*******************************************************************************
*   Title: programs_final.do
* Purpose: Helper programs for draft file draft_final.do
*   Owner: EZ
*    Date: 2022-01-12
*******************************************************************************

*******************************************************************************
* Helpers and load_analysis_data()
*******************************************************************************
capture program drop set_env_ds/*%<*/
program define set_env_ds

    /* We've numbered different computations of individuals' wealth in the tax
        data. Store in global macros a number of these
        specifications which we use for the purpose of the exhibits in the 
        paper. */
    global sz_wlth_defn = 20 // Specification which matches Saez-Zucman

    * Environment as of 20220128
    * 3215 for 1966-1979 
    * 3115 for 1980-2000 
    * 3016 for 2001-2016 
    global preferred_defn_early = 3215
    global preferred_defn_mid = 3115
    global preferred_defn_late = 3016

    global niw_defn_early = 3715
    global niw_defn_mid = 3615
    global niw_defn_late = 3516

    global cmd_defn_late = 3916

    global sz_wlth_defn_early_v3 = 3207 // Equal returns spec
    global sz_wlth_defn_mid_v3 = 3107 // Equal returns spec
    global sz_wlth_defn_late_v3 = 3007 // Equal returns spec

end/*%>*/

capture program drop load_analysis_data/*%<*/
program define load_analysis_data

    if ("`1'" == "psz_top") {/*%<*/

        * Top 1% wealth share is on a different Excel sheet that top 0.1% and top 0.01%
        import excel using "$inputs/PSZ2017AppendixTablesII(Distrib).xlsx", ///
            sheet("TE2b") cellrange(A10) clear

        keep A B N
        rename (A B N) (year top10_pszes top1_pszes)
        drop if missing(year)

        tempfile top1_pszes
        save `top1_pszes'

        import excel using "$inputs/PSZ2017AppendixTablesII(Distrib).xlsx", ///
            sheet("TE2c") cellrange(A10) clear

        keep A H N
        rename (A H N) (year top01_pszes top001_pszes)
        drop if missing(year)

        merge 1:1 year using `top1_pszes', assert(3) nogen

        * No data after 2014
        assert missing(top10_pszes) & missing(top1_pszes) & missing(top01_pszes) & missing(top001_pszes) if year > 2014
        drop if year > 2014

        assert inrange(top10_pszes, 0, 1) & inrange(top1_pszes, 0, 1) & inrange(top01_pszes, 0, 1) & inrange(top001_pszes, 0, 1)
        foreach topgrp in top10 top1 top01 top001 {
            replace `topgrp'_pszes = `topgrp'_pszes * 100 
        }

    }/*%>*/
    if ("`1'" == "ks01") {/*%<*/
        import excel using ///
            "$inputs/AppendixTables(OtherEstimates).xlsx", ///
                sheet(TableC4) cellrange(A9:T111) clear

        keep A C D G H

        rename (A D H) (year top01_sz16 top01_ks)

        foreach wlthshr of varlist top* { 
            assert inrange(`wlthshr', 0, 1) if !missing(`wlthshr')
            assert missing(`wlthshr') if year > 2012
            replace `wlthshr' = 100 * `wlthshr'
        }

        keep if year <= 2012
    }/*%>*/
    if ("`1'" == "scfraw") {/*%<*/
        use $inputs/scfplus_slim.dta, clear

        keep year wgt networth
        cumul networth [aw = wgt], by(year) gen(wlthrank)

        gen top01 = cond(wlthrank > 0.999, 1, 0)
        gen networth_wgtd = networth * wgt

        collapse (sum) networth = networth_wgtd, by(year top01)

    }/*%>*/
    if ("`1'" == "szz") {/*%<*/
        load_preferred_taxdata, earlyspec($preferred_defn_early) ///
                                midspec($preferred_defn_mid) ///
                                latespec($preferred_defn_late)
    }/*%>*/
    if ("`1'" == "szz_tu") {/*%<*/
        load_preferred_taxdata, earlyspec($preferred_defn_early) ///
                                midspec($preferred_defn_mid) ///
                                latespec($preferred_defn_late) ///
                                units("tu")
    }/*%>*/
    if ("`1'" == "szz_niw") {/*%<*/
        load_taxdata, rankspec($niw_defn_late) year(2016)
    }/*%>*/
    if ("`1'" == "szz_cmd") {/*%<*/
        load_taxdata, rankspec($cmd_defn_late) year(2016)
    }/*%>*/
    if ("`1'" == "szz_agi") {/*%<*/
        use $inputs/fixinc_total_ranked_by_agi.dta, clear
    }/*%>*/
    if ("`1'" == "szz_forbes") {/*%<*/

        if ("`2'" == "") {
            use $inputs/forbes400_blending_m3115_l3016.dta, clear
        }
        else if ("`2'" == "3016") {
            use $inputs/forbes400_blending_m3115_l3016.dta, clear
        }
        else if ("`2'" == "3022") {
            use $inputs/forbes400_blending_m3122_l3022.dta, clear
        }
        else if ("`2'" == "3024") {
            use $inputs/forbes400_blending_m3124_l3024.dta, clear
        }
        else if ("`2'" == "3027") {
            use $inputs/forbes400_blending_m3124_l3027.dta, clear
        }
    }/*%>*/
    if ("`1'" == "sz") {/*%<*/
        load_taxdata, rankspec($sz_wlth_defn) startyr(1966) endyr(2016)
    }/*%>*/
    if ("`1'" == "sz_v3") {/*%<*/
        tempfile part
        load_taxdata, rankspec($sz_wlth_defn_early_v3) startyr(1966) endyr(1979)
        save `part'
        load_taxdata, rankspec($sz_wlth_defn_mid_v3) startyr(1980) endyr(2000)
        append using `part'
        save `part', replace
        load_taxdata, rankspec($sz_wlth_defn_late_v3) startyr(2001) endyr(2016)
        append using `part'

        gen hweal_szv3 = cond(year < 1980, hweal$sz_wlth_defn_early_v3, ///
                            cond(year >= 1980 & year < 2001, hweal$sz_wlth_defn_mid_v3, ///
                                hweal$sz_wlth_defn_late_v3))
    }/*%>*/
    if ("`1'" == "altspec") {/*%<*/

        local latespec `2'
        if ("`latespec'" == "3021") {
            local earlyspec = 3221
            local midspec = 3121
        }
        if ("`latespec'" == "3022") {
            local earlyspec = 3222
            local midspec = 3122
        }
        if ("`latespec'" == "3023") {
            local earlyspec = 3223
            local midspec = 3123
        }
        if ("`latespec'" == "3024") {
            local earlyspec = 3224
            local midspec = 3124
        }
        if ("`latespec'" == "3025") {
            local earlyspec = 3225
            local midspec = 3125
        }
        if ("`latespec'" == "3016") {
            local earlyspec = 3215
            local midspec = 3115
        }
        if ("`latespec'" == "3017") {
            local earlyspec = 3217
            local midspec = 3117
        }
        if ("`latespec'" == "3018") {
            local earlyspec = 3218
            local midspec = 3118
        }
        * Trimmed specs have same early and mid as progenitors
        if ("`latespec'" == "3026") {
            local earlyspec = 3215
            local midspec = 3115
        }
        if ("`latespec'" == "3027") {
            local earlyspec = 3224
            local midspec = 3124
        }
        tempfile part
        load_taxdata, rankspec(`earlyspec') startyr(1966) endyr(1979)
        save `part'
        load_taxdata, rankspec(`midspec') startyr(1980) endyr(2000)
        append using `part'
        save `part', replace
        load_taxdata, rankspec(`latespec') startyr(2001) endyr(2016)
        append using `part'

        gen hweal_alt = cond(year < 1980, hweal`earlyspec', ///
                            cond(year >= 1980 & year < 2001, hweal`midspec', ///
                                hweal`latespec'))
    }/*%>*/
    if ("`1'" == "sz2020_top") {/*%<*/

        tempfile t0001
        * Pulled from Distrib and Aggreg tables Feb 2022
        import delimit $inputs/sz2020_top0001.csv, clear
        keep year top0001_share
        rename top0001_share top0001_pszes20
        save `t0001'

        * Top 1% wealth share is on a different Excel sheet than top 0.1% and top 0.01%
        import excel using "$inputs/PSZ2020AppendixTablesII(Distrib).xlsx", ///
            sheet("TE2b") cellrange(A10) clear

        keep A B N
        rename (A B N) (year top10_pszes20 top1_pszes20)
        drop if missing(year)

        tempfile top1_pszes
        save `top1_pszes'

        import excel using "$inputs/PSZ2020AppendixTablesII(Distrib).xlsx", ///
            sheet("TE2c") cellrange(A10) clear

        keep A H N
        rename (A H N) (year top01_pszes20 top001_pszes20)
        drop if missing(year)

        merge 1:1 year using `top1_pszes', assert(3) nogen

        foreach topgrp in top10 top1 top01 top001 {
            replace `topgrp'_pszes20 = `topgrp'_pszes20 * 100 
        }

        merge 1:1 year using `t0001', keep(3) nogen
        
    }/*%>*/
    if ("`1'" == "sz2020_port") {/*%<*/

        * Pulled from Distrib tables Feb 2022
        import delimit $inputs/sz20_portfolios.csv, clear
        rename top1* *_top1
        rename top01* *_top01
        rename top001* *_top001
        
    }/*%>*/
    if ("`1'" == "natinc") {/*%<*/
        /* Storing it for replication purposes because FRED data can change

        freduse A032RC1A027NBEA, clear // This is nominal national income from the BEA
        gen year = substr(date, 1, 4)
        drop date*

        destring year, replace

        rename A032RC1A027NBEA Y // so it's more recognizable as national income

        replace Y = Y * 1E9

        sort year

        save natinc.dta, replace
        */
        use natinc.dta, clear
        keep if year <= 2019
        rename Y y
    }/*%>*/
    if ("`1'" == "parameters_older") {/*%<*/
        import delim $inputs/parameters.csv, varnames(1) clear
        
        rename yr year
        summ year, meanonly
        assert `r(max)' == 2015
        keep if inrange(year, 1966, 2015)
        
        gen ttbus = ttschcpartw + ttscorw
        gen ttpen = ttpenw + ttpeniraw
        gen tthou = ttrestw + ttmortw + ttrentw 

        gen ttfix = ttcurrency + ttinttaxw + ttintexmw 
    }/*%>*/
    if ("`1'" == "parameters_old") {/*%<*/
        import excel using $inputs/parameters.xlsx, firstrow clear

        keep if !missing(yr) 
        rename yr year
    }/*%>*/
    if ("`1'" == "parameters_new") {/*%<*/
        * Built in build_parameters.do file
        * Follows definitions from SZ with additional categories to account for
        * revisions and improved understanding of categories.

        insheet using "$inputs/parameters_wealth_expanded.csv", comma clear

        keep year ttdivw ttintexmw ttinttaxw ttmmbondfund ttmiscw ttschcpartw ttscorw ///
            ttrentw ttmortw ttrestw ttrentmortw ttpeniraw ttpenw ttpenw_incl_unfunded_db ttcurrency

        assert sign(ttmortw) == -1

        gen ttbus = ttschcpartw + ttscorw
        gen ttpen = ttpenw + ttpeniraw
        gen ttpen_preferred_check = ttpenw_incl_unfunded_db + ttpeniraw
        gen tthou = ttrestw + ttmortw + ttrentw 

        gen ttfix = ttcurrency + ttinttaxw + ttintexmw + ttmmbondfund

    }/*%>*/
    if ("`1'" == "deposit_rates") {/*%<*/
        insheet using $inputs/deprate_excl_checking.csv, clear

        * Data are quarterly, underlying are weighted means. See Savov email and
        * original spreadsheet.
        collapse (mean) r_deposit = deprate_excl_checking, by(year)
        replace r_deposit = r_deposit * 100
        sort year

    }/*%>*/
    if ("`1'" == "fred_rates") {/*%<*/
        /*
        freduse AAA BAA DGS10 GS5 BAMLC0A4CBBBEY, clear

        gen year = substr(date, 1, 4)
        destring year, replace

        collapse (mean) r_aaa = AAA r_baa = BAA r_ust5 = GS5 r_ust10 = DGS10 r_bbb = BAMLC0A4CBBBEY, by(year)
        save "fred_rates.dta", replace
        */

        use "fred_rates.dta", clear
    }/*%>*/
    if ("`1'" == "usfa") {/*%<*/
        import delim "$inputs/l223.csv", clear varnames(1)

        keep if regexm(date, ":Q4")
        gen year = real(substr(date, 1, 4))
        isid year

        destring lm883164135q, replace ignore("ND")

        * Do mid-year averaging following SZ 2020
        expand 2
        sort year
        by year: gen midyear = cond(_n == 1, year, year + 1)

        * Drop years for which we can't get midyear averages
        foreach bookend of numlist 1945 2020 {
            qui count if midyear == `bookend'
            assert `r(N)' == 1
        }
        qui drop if inlist(midyear, 1945, 2020)

        collapse (mean) privccorw_usfa = lm883164135q, by(midyear)

        rename midyear year
    }/*%>*/
    if ("`1'" == "cpi") {/*%<*/

        freduse CPIAUCSL, clear

        gen year = yofd(daten)

        collapse (mean) CPIAUCSL, by(year)

        sort year
        count if year <= 2019
        local row2019 = `r(N)'
        assert year[`row2019'] == 2019

        count if year <= 2016
        local row2016 = `r(N)'
        assert year[`row2016'] == 2016

        gen adjfactor2016 = CPIAUCSL[`row2016'] / CPIAUCSL
        gen adjfactor2019 = CPIAUCSL[`row2019'] / CPIAUCSL

        save "cpi.dta", replace

        use "cpi.dta", clear
    }/*%>*/
    if ("`1'" == "dfaraw") {/*%<*/
        
        import delim "$inputs/dfa-networth-levels-detail.csv", clear

        gen year = real(substr(date, 1, 4)) // Date has quarters
        assert !missing(year) & inrange(year, 1989, 2020)

    }/*%>*/
    if ("`1'" == "forbes") {/*%<*/
        use $inputs/forbeswlth.dta, clear
    }/*%>*/
    if ("`1'" == "family") {/*%<*/
        load_analysis_data scfrevision

        gen privcorp = privccorw + scorw

        assert othdebt <= 0
        replace hwoth = hwoth + othdebt
        gen pen_oth = hwpen + hwoth

        ds hwequ hwfix hwbus hwpen hwhou hwoth pen_oth ccorw pthru privccorw privcorp
        local othersplitvars "`r(varlist)'"

        es_rank_scf, rankvar(networth_pref) othersplitvars("`othersplitvars'")

        tempfile orig 
        save `orig'

        foreach wlthgrp in top001 {

            use `orig', clear

            #delimit ;
            local threshold = cond("`wlthgrp'" == "top1", 0.99, 
                              cond("`wlthgrp'" == "top01", 0.999, 
                              cond("`wlthgrp'" == "top001", 0.9999, 0.99999)));
            #delimit cr

            keep if es_rank >= `threshold' & year == 2016

            egen networth_check1 = rowtotal(hwequ hwfix hwbus hwhou pen_oth)
            egen networth_check2 = rowtotal(ccorw hwfix pthru hwhou pen_oth)
            assert inrange(networth_pref / networth_check1, 0.999, 1.001) & ///
                inrange(networth_pref / networth_check2, 0.999, 1.001)

            collapse (sum) hweal = networth_pref `othersplitvars' [fw = wgt1B]

            assert _N == 1

            foreach billionX of varlist hweal `othersplitvars' {
                replace `billionX' = `billionX' / 1E9
            }

            if "`wlthgrp'" == "top001" {
                foreach sharevar of varlist hwbus hwequ hwfix hwhou pen_oth {
                    local `sharevar'_sh_`wlthgrp' = (`sharevar' / hweal)
                    assert inrange(``sharevar'_sh_`wlthgrp'', 0, 1)
                }

                local privccorw_sh_hwbus = privccorw / hwbus 
                local ccorw_sh_privcorp = privccorw / privcorp 

                assert inrange(`privccorw_sh_hwbus', 0, 1) & inrange(`ccorw_sh_privcorp', 0, 1)
            }
        }

        clear
        set obs 1

        /***********************************************************************
            Allocate net worth share to operating business; see page 15
                ``Family's stake in the operating business.'' 52% of family 
                wealth invested in operating business. 23% complete control of 
                operating business, 50% majority, 11% minority with control, 16% 
                minority without control. Conservatively, say that 73%(=23 + 50) 
                of operating biz are private and 27%(=11 + 16) public.
        ***********************************************************************/

        gen operatingbiz = 52
        gen operatingbiz_pvt = (0.23 + 0.5) * 52
        gen operatingbiz_pvtccorw = operatingbiz_pvt * `privccorw_sh_hwbus'
        gen operatingbiz_pub = (0.11 + 0.16) * 52

        /*******************************************************************
            Enter shares directly from graphic, which covers
                portfolio shares for assetes under management (not operating 
                biz). These are copied directly from page 17.
        ********************************************************************/

        * ``Developed market'' + ``Developing market''
        gen equity = (23 + 6) * ((100 - operatingbiz) / 100)

        /* ``Direct venture capital/private equity'' + ``Private equity funds'' + 
            ``Coinvesting'' */
        gen privateequity = (12 + 8 + 2) * ((100 - operatingbiz) / 100)

        gen realestate = 13 * ((100 - operatingbiz) / 100)
        gen hedgefunds = 9 * ((100 - operatingbiz) / 100)
        gen otherassets = 3 * ((100 - operatingbiz) / 100)
        gen etfs = 1 * ((100 - operatingbiz) / 100)
        gen reits = 1 * ((100 - operatingbiz) / 100)
        gen tangibles = 1 * ((100 - operatingbiz) / 100)
        gen agriculture = 1 * ((100 - operatingbiz) / 100)
        gen commodities = 1 * ((100 - operatingbiz) / 100)
        gen cashorequivalent = 6 * ((100 - operatingbiz) / 100)

        * ``Developed market'' + ``Developing market''
        gen fixedincome = (9 + 1) * ((100 - operatingbiz) / 100)

        /*******************************************************************
            Copy hedge funds breakdown from page 33: credit and 
                distressed strategies are fixed income investments, 
                everything else should be equity.
        ********************************************************************/

        gen fixshare_hedgefunds = 0.11 + 0.05
        gen equshare_hedgefunds = 1 - fixshare_hedgefunds

        /*******************************************************************
            Allocate to risk-based asset pricing categories
        ********************************************************************/

        gen hwfix_sh = fixedincome + (0.5 * etfs) + (fixshare_hedgefunds * hedgefunds) + ///
                       cashorequivalent

        gen hwequ_sh = operatingbiz_pub + equity + (0.5 * etfs) + (equshare_hedgefunds * hedgefunds)
        gen hwbus_sh = operatingbiz_pvt + privateequity + agriculture
        gen hwhou_sh = realestate + reits
        gen hwoth_sh = otherassets + tangibles + commodities

        gen ccorw_sh = hwequ_sh + operatingbiz_pvtccorw + privateequity
        gen pthru_sh = operatingbiz_pvt - operatingbiz_pvtccorw + agriculture

        
        egen total_shares = rowtotal(hw*)
        gen total_shares_check = hwfix_sh + ccorw_sh + pthru_sh + hwhou_sh + hwoth_sh

        assert abs(total_shares - total_shares_check) < 0.1

        assert total_shares < 100

        foreach sharevar of varlist hw* ccorw_sh pthru_sh {
            replace `sharevar' = (`sharevar' / total_shares) * 100
        }
        drop total_shares

        * No pension assets, so just ``Pension and other'' is just ``Other''
        rename hwoth_sh pen_oth_sh

    }/*%>*/
    if ("`1'" == "estatecross_mgs") {/*%<*/
        load_analysis_data scfrevision

        gen privcorp = privccorw + scorw

        assert othdebt <= 0
        replace hwoth = hwoth + othdebt
        gen pen_oth = hwpen + hwoth

        ds hwequ hwfix hwbus hwpen hwhou hwoth pen_oth ccorw pthru privccorw privcorp
        local othersplitvars "`r(varlist)'"

        es_rank_scf, rankvar(networth_pref) othersplitvars("`othersplitvars'")

        tempfile orig 
        save `orig'

        foreach wlthgrp in top001 {

            use `orig', clear

            #delimit ;
            local threshold = cond("`wlthgrp'" == "top1", 0.99, 
                              cond("`wlthgrp'" == "top01", 0.999, 
                              cond("`wlthgrp'" == "top001", 0.9999, 0.99999)));
            #delimit cr

            keep if es_rank >= `threshold' & year == 2016

            egen networth_check1 = rowtotal(hwequ hwfix hwbus hwhou pen_oth)
            egen networth_check2 = rowtotal(ccorw hwfix pthru hwhou pen_oth)
            assert inrange(networth_pref / networth_check1, 0.999, 1.001) & ///
                inrange(networth_pref / networth_check2, 0.999, 1.001)

            collapse (sum) hweal = networth_pref `othersplitvars' [fw = wgt1B]

            assert _N == 1

            foreach billionX of varlist hweal `othersplitvars' {
                replace `billionX' = `billionX' / 1E9
            }

            if "`wlthgrp'" == "top001" {
                foreach sharevar of varlist hwbus hwequ hwfix hwhou pen_oth {
                    local `sharevar'_sh_`wlthgrp' = (`sharevar' / hweal)
                    assert inrange(``sharevar'_sh_`wlthgrp'', 0, 1)
                }

                local privccorw_sh_hwbus = privccorw / hwbus 
                local ccorw_sh_privcorp = privccorw / privcorp 

                assert inrange(`privccorw_sh_hwbus', 0, 1) & inrange(`ccorw_sh_privcorp', 0, 1)
            }
        }
        import delim using $inputs/fig_13v3.csv, clear

        gen ccorw_sh = share_public_stock + (`ccorw_sh_privcorp' * share_closelyheld_stock) + ///
                       share_pe_hedgefunds

        gen pthru_sh = ((1 - `ccorw_sh_privcorp') * share_closelyheld_stock) + ///
                       share_realestate_partnerships + share_other_ltd_partnerships + ///
                       share_noncorporate_bizassets

        rename (share_publicequity share_fixed_income share_net_housing ///
                share_pension share_privatebiz share_other) ///
            (hwequ_sh hwfix_sh hwhou_sh hwpen_sh hwbus_sh hwoth_sh)

    }/*%>*/

    if ("`1'" == "fiscalflows") {/*%<*/

        load_analysis_data fiscalflowsraw

        /***************************************************************************
            Keep only (1) interest income (2) real estate taxes (3) dividend
                income including estate income (4) capital gains income including
                estate income (5) S-corp and partnership income (6) sole-
                proprietorship income (7) wage income and (8) pension income.
        ***************************************************************************/

        keep year group fixinc realestatetax dividendinc capitalginc scorppship /// 
		schcincp wagind peninc

        * DONE fix
        *gen fiwag_only = fiwag - peninc

        /***************************************************************************
            Cycling through income category, compute shares of income accruing 
                to each of five fiscal income groups: top 10%, 1%, 0.1%, and 0.01%.
        ***************************************************************************/

        bysort year: assert group[1] == "all" // Ensure this is total income for each year

        ds year group, not
        local inccat_list = "`r(varlist)'"

        foreach inccat in `inccat_list' {
            bysort year: gen sh_`inccat' = (`inccat' / `inccat'[1]) * 100
            drop `inccat'
        }

        drop if group == "all" // This will mechanically be 100% for each column

        /***************************************************************************
            Reshape wide so that each income group and income category has its  
                own variable (e.g. we have sh_fixinc_top10, etc.)
        ***************************************************************************/

        reshape wide sh_*, i(year) j(group, string)
    }/*%>*/
    if ("`1'" == "scfcross") {/*%<*/

        /*******************************************************************
            Load data; keep and delineate portfolio categories
        *******************************************************************/
            
        use $inputs/scf_revision.dta, clear
        assert sign(othdebt) == -1 | othdebt == 0
        replace hwoth = hwoth + othdebt

        gen pen_hou_oth = hwhou + hwpen + hwoth

        keep networth_pref year wgt married hwfix hwequ privccorw ccorw hwbus pthru ///
            hwhou hwpen hwoth pen_hou_oth 
        /*gen ccorw = stocks + privccorw + stmutf + (0.5 * comutf) + trusts_equity + (0.5 * omutf)

        #delimit ;
        keep networth_pref year wgt married hwfix hwequ hwbus hwpen hwhou hwoth pen_hou_oth ccorw pthru
            mmda saving call cds savbnd bond notxbnd privloans mortgageassets inttaxw 
            othdebt hwfix currency *mutf mmmf intexmw trusts_fixed_sz trusts_inttaxw
            trusts_intexmw trusts_mmbondfund;
        #delimit cr
        */

        /*******************************************************************
            Implement equal-split ranking adjustment using
                program and check that portfolio categories still add up to
                total preferred net worth
        *******************************************************************/

        ds year wgt married networth_pref privccorw, not
        local portfoliovars = "`r(varlist)'"

        es_rank_scf, rankvar(networth_pref) othersplitvars("`portfoliovars' privccorw")

        egen hweal_chk1 = rowtotal(hwfix hwequ hwbus pen_hou_oth)
        assert inrange(networth_pref / hweal_chk1, 0.999, 1.001) | ///
            abs(networth_pref - hweal_chk1) < 2

        egen hweal_chk2 = rowtotal(hwfix ccorw pthru pen_hou_oth)
        assert inrange(networth_pref / hweal_chk2, 0.999, 1.001) | ///
            abs(networth_pref - hweal_chk2) < 2
        drop hweal_chk?

        /*******************************************************************
            Create groups within top decile
        *******************************************************************/

        egen grp = cut(es_rank), at(0 0.9(.01)0.99 0.999 0.9999 1.01) icodes

        assert !missing(grp)

        qui summ grp, meanonly
        local topgrpid = `r(max)'
        assert `topgrpid' == 12

        foreach unweighted of varlist networth_pref `portfoliovars' privccorw {
            qui replace `unweighted' = `unweighted' * wgt
        }

        /*******************************************************************
            Collapse to yield totals by year and group; create 
                aggregate weaelth concept
        *******************************************************************/

        collapse (sum) networth_pref `portfoliovars' privccorw, by(year grp)

        sort year
        by year: egen agg_wealth = total(networth_pref)

        reshape wide networth_pref `portfoliovars' privccorw, i(year) j(grp)

        ds networth_pref*
        egen agg_wealth_check = rowtotal(networth_pref*)

        assert inrange(agg_wealth / agg_wealth_check, 0.9999, 1.0001)
        drop agg_wealth_check

        /*******************************************************************
            Get 2016 portfolio shares among the top 0.01%, which
                we'll use to get Forbes 400 wealth in a subsequent section
        *******************************************************************/

        sort year
        qui count if year <= 2016
        local row2016 = `r(N)'
        assert year[`row2016'] == 2016

        local privccorw_sh_hwbus = privccorw12[`row2016'] / hwbus12[`row2016']
        assert inrange(`privccorw_sh_hwbus', 0, 1)

        * Retrieve portfolio shares of top 0.01% in SCF
        foreach portfoliocat in `portfoliovars' {
            local portfoliocatname = subinstr("`portfoliocat'", "12", "", 1) + "_scf"

            local `portfoliocat'top001sh = `portfoliocat'12[`row2016'] / networth_pref12[`row2016']
            di "`portfoliocat'top001sh: " ``portfoliocat'top001sh'
        }

        local pubprivequshare = `hwequtop001sh' + `hwbustop001sh'
        local pubprivequshare_check = `ccorwtop001sh' + `pthrutop001sh'
        assert abs(`pubprivequshare' - `pubprivequshare_check') < 0.00001

        /*******************************************************************
            Reshape long again and compress tippy-dippy top group
                into top 0.1%; then rename columns and save as tempfile
        *******************************************************************/
        reshape long networth_pref `portfoliovars' privccorw, i(year) j(grp)
       

        collapse (sum) networth_pref `portfoliovars' privccorw (firstnm) agg_wealth, by(grp year)
            
        rename (networth_pref agg_wealth hw* ccorw privccorw pthru pen_hou_oth) ///
            (hweal_scf hweal_agg_scf hw*_scf ccorw_scf privccorw_scf pthru_scf pen_hou_oth_scf)

        * Keep some share variables for Forbes add on
        foreach portfoliocat in `portfoliovars' {
            gen `portfoliocat'top001sh = ``portfoliocat'top001sh'
        }
        gen pubprivequshare = `hwequtop001sh' + `hwbustop001sh'
        gen privccorw_sh_hwbus = `privccorw_sh_hwbus'

    }/*%>*/
    if ("`1'" == "scfcross_v3") {/*%<*/

        /*******************************************************************
            Load data; keep and delineate portfolio categories
        *******************************************************************/
            
        use $inputs/scf_revision.dta, clear
        assert sign(othdebt) == -1 | othdebt == 0
        replace hwoth = hwoth + othdebt

        * Pension and vehicle adjustments
        drop networth_pref
        gen networth_pref = networth + funded_pen_db - vehic - durables
        replace hwpen = hwpen - tot_pen_db + funded_pen_db
        replace hwoth = hwoth - veh_inst

        gen pen_hou_oth = hwhou + hwpen + hwoth

        #delimit ;
        keep networth_pref year wgt married hwfix hwequ hwbus hwpen hwhou hwoth pen_hou_oth ccorw pthru
            mmda saving call cds savbnd bond notxbnd privloans mortgageassets inttaxw 
            othdebt hwfix currency *mutf mmmf intexmw trusts_fixed_sz trusts_inttaxw
            trusts_intexmw trusts_mmbondfund privccorw;
        #delimit cr

        /*******************************************************************
            Implement equal-split ranking adjustment using
                program and check that portfolio categories still add up to
                total preferred net worth
        *******************************************************************/

        ds year wgt married networth_pref privccorw, not
        local portfoliovars = "`r(varlist)'"

        es_rank_scf, rankvar(networth_pref) othersplitvars("`portfoliovars' privccorw")

        egen hweal_chk1 = rowtotal(hwfix hwequ hwbus pen_hou_oth)
        assert inrange(networth_pref / hweal_chk1, 0.999, 1.001) | ///
            abs(networth_pref - hweal_chk1) < 2

        egen hweal_chk2 = rowtotal(hwfix ccorw pthru pen_hou_oth)
        assert inrange(networth_pref / hweal_chk2, 0.999, 1.001) | ///
            abs(networth_pref - hweal_chk2) < 2
        drop hweal_chk?

        /*******************************************************************
            Create groups within top decile
        *******************************************************************/

        egen grp = cut(es_rank), at(0 0.9(.01)0.99 0.999 0.9999 1.01) icodes

        assert !missing(grp)

        qui summ grp, meanonly
        local topgrpid = `r(max)'
        assert `topgrpid' == 12

        foreach unweighted of varlist networth_pref `portfoliovars' privccorw {
            qui replace `unweighted' = `unweighted' * wgt
        }

        /*******************************************************************
            Collapse to yield totals by year and group; create 
                aggregate weaelth concept
        *******************************************************************/

        collapse (sum) networth_pref `portfoliovars' privccorw, by(year grp)

        sort year
        by year: egen agg_wealth = total(networth_pref)

        reshape wide networth_pref `portfoliovars' privccorw, i(year) j(grp)

        ds networth_pref*
        egen agg_wealth_check = rowtotal(networth_pref*)

        assert inrange(agg_wealth / agg_wealth_check, 0.9999, 1.0001)
        drop agg_wealth_check

        /*******************************************************************
            Get 2016 portfolio shares among the top 0.01%, which
                we'll use to get Forbes 400 wealth in section 2.1.2
        *******************************************************************/

        sort year
        qui count if year <= 2016
        local row2016 = `r(N)'
        assert year[`row2016'] == 2016

        local privccorw_sh_hwbus = privccorw12[`row2016'] / hwbus12[`row2016']
        assert inrange(`privccorw_sh_hwbus', 0, 1)

        * Retrieve portfolio shares of top 0.01% in SCF
        foreach portfoliocat in `portfoliovars' {
            local portfoliocatname = subinstr("`portfoliocat'", "12", "", 1) + "_scf"

            local `portfoliocat'top001sh = `portfoliocat'12[`row2016'] / networth_pref12[`row2016']
            di "`portfoliocat'top001sh: " ``portfoliocat'top001sh'
        }

        local pubprivequshare = `hwequtop001sh' + `hwbustop001sh'
        local pubprivequshare_check = `ccorwtop001sh' + `pthrutop001sh'
        assert abs(`pubprivequshare' - `pubprivequshare_check') < 0.00001

        /*******************************************************************
            Reshape long again and compress tippy-dippy top group
                into top 0.1%; then rename columns and save as tempfile
        *******************************************************************/
        reshape long networth_pref `portfoliovars' privccorw, i(year) j(grp)
       
        * Don't need separate top 0.01% anymore; just for F400 portfolio
        *replace grp = 11 if grp == 12 
        *drop if grp == 0 // Don't need bottom 90% either

        collapse (sum) networth_pref `portfoliovars' privccorw (firstnm) agg_wealth, by(grp year)
            
        rename (networth_pref agg_wealth hw* ccorw privccorw pthru pen_hou_oth) ///
            (hweal_scf hweal_agg_scf hw*_scf ccorw_scf privccorw_scf pthru_scf pen_hou_oth_scf)

        * Keep some share variables for Forbes add on
        foreach portfoliocat in `portfoliovars' {
            gen `portfoliocat'top001sh = ``portfoliocat'top001sh'
        }
        gen pubprivequshare = `hwequtop001sh' + `hwbustop001sh'
        gen privccorw_sh_hwbus = `privccorw_sh_hwbus'

    }/*%>*/
    if ("`1'" == "scfcross_v3_w400") {/*%<*/
        load_analysis_data scfcross_v3
        tempfile scf
        save `scf'

        local portfoliovars = "hwfix hwequ ccorw hwbus pthru hwhou hwpen hwoth pen_hou_oth"

        foreach portfoliocat in ccorw pthru hwfix hwpen hwhou hwoth pen_hou_oth  {
            sum `portfoliocat'top001sh, meanonly
            local `portfoliocat'top001sh = `r(mean)'
        }
        sum pubprivequshare, meanonly
        local pubprivequshare = `r(mean)'

        sum privccorw_sh_hwbus, meanonly
        local privccorw_sh_hwbus = `r(mean)'

        use $inputs/forbes_2016_pubpriv_dsupdate.dta, clear

        assert forbes_yr == 2016
        keep net_worthmillions public_company forbes_yr
        rename forbes_yr year

        drop if missing(net_worthmillions) 
        assert _N == 400

        gen hweal_f400 = net_worthmillions * 1E6 // Scale up into dollars

        foreach portfoliocat in hwfix hwpen hwhou hwoth pen_hou_oth {

            gen `portfoliocat'_f400 = ``portfoliocat'top001sh' * hweal_f400
        }

        gen hwequ_f400 = cond(public_company == 1, hweal_f400 * `pubprivequshare', 0)
        gen hwbus_f400 = cond(public_company == 0, hweal_f400 * `pubprivequshare', 0)

        gen ccorw_f400 = cond(public_company == 0, hweal * `pubprivequshare' * `privccorw_sh_hwbus', ///
                              hweal_f400 * `pubprivequshare')
        gen pthru_f400 = cond(public_company == 0, hweal * `pubprivequshare' * (1 - `privccorw_sh_hwbus'), 0)

        collapse (sum) *_f400

        foreach variable of varlist * {
            global `variable' = `variable'[1]
            di "`variable' (billions): " ${`variable'} / 1E9
        }

        local hweal_f400_check = $hwfix_f400 + $hwequ_f400 + $hwbus_f400 + $pen_hou_oth_f400

        assert inrange(`hweal_f400_check' / $hweal_f400, 0.999, 1.001) 

        export delimited using forbesportfolios.csv, replace

        /***********************************************************************
            Load SCF tempfile from 2.1 and compute shares of aggregates 
                with and without
        ***********************************************************************/
        
        use `scf', clear
        keep if year == 2016

        gen hweal_agg_scf_f400 = hweal_agg_scf + $hweal_f400

        foreach portfoliocat in hweal `portfoliovars' privccorw {
            gen `portfoliocat'_scfAsh = (`portfoliocat'_scf / hweal_agg_scf) * 100
            
            if "`portfoliocat'" != "privccorw" {    
                gen `portfoliocat'_scf_f400 = cond(grp < 12, `portfoliocat'_scf, ///
                                                    `portfoliocat'_scf + ${`portfoliocat'_f400})  

                gen `portfoliocat'_scf_f400Ash = (`portfoliocat'_scf_f400 / hweal_agg_scf_f400) * 100
            }
        }

        #delimit ;
        gen hweal_scfAsh_check = hwfix_scfAsh + hwequ_scfAsh + hwbus_scfAsh + 
                                 pen_hou_oth_scfAsh;
        assert abs(hweal_scfAsh_check - hweal_scfAsh) < 0.01;

        gen hweal_scfAsh_check2 = hwfix_scfAsh + ccorw_scfAsh + pthru_scfAsh + 
                                  pen_hou_oth_scfAsh;
        assert abs(hweal_scfAsh_check2 - hweal_scfAsh) < 0.01;

        gen hweal_scf_f400Ash_check = hwfix_scf_f400Ash + hwequ_scf_f400Ash +
                                      hwbus_scf_f400Ash + pen_hou_oth_scf_f400Ash;
        assert abs(hweal_scf_f400Ash_check - hweal_scf_f400Ash) < 0.01;

        gen hweal_scf_f400Ash_check2 = hwfix_scf_f400Ash + ccorw_scf_f400Ash +
                                       pthru_scf_f400Ash + pen_hou_oth_scf_f400Ash;
        assert abs(hweal_scf_f400Ash_check2 - hweal_scf_f400Ash) < 0.01;
        #delimit cr

        keep year grp *Ash

    }/*%>*/
    if ("`1'" == "dfacross") {/*%<*/
        use $inputs/dfa_revision.dta, replace

        keep if year == 2016

        assert sign(othdebt_dfa) == -1
        replace hwoth_dfa = hwoth_dfa + othdebt_dfa

        gen pen_hou_oth_dfa = hwpen_dfa + hwhou_dfa + hwoth_dfa

        egen hweal_chk1 = rowtotal(hwfix_dfa hwequ_dfa hwbus_dfa pen_hou_oth_dfa)
        egen hweal_chk2 = rowtotal(hwfix_dfa ccorw_dfa pthru_dfa pen_hou_oth_dfa)

        assert inrange(hweal_chk1 / networth_pref, 0.999, 1.001) & ///
               inrange(hweal_chk2 / networth_pref, 0.999, 1.001)

        keep category networth_pref hw*_dfa ccorw_dfa pthru_dfa pen_hou_oth_dfa

        qui summ networth_pref, meanonly
        local ttwealth_dfa = `r(sum)'

        drop if inlist(category, "Bottom50", "Next40") // Interested in top groups

        foreach portfoliocat of varlist *_dfa {
            gen `portfoliocat'Ash = (`portfoliocat' / `ttwealth_dfa') * 100 
        
            replace `portfoliocat'Ash = cond(category == "Next9", `portfoliocat'Ash / 9, ///
                                            `portfoliocat'Ash / 3) 
        }

        expand 9 if category == "Next9"
        expand 4 if category == "Top1"

        sort category
        assert category == "Next9" if inrange(_n, 1, 9)
        assert category == "Top1" if inlist(_n, 10, 13)

        gen grp = cond(category == "Next9", _n, _n - 1)
    }/*%>*/
    if ("`1'" == "scfharmony") {/*%<*/
        use $inputs/scf_revision.dta, clear
        keep year wgt networth_pref tot_pen_db vehic veh_inst married

        es_rank_scf, rankvar(networth_pref) othersplitvars("`portfoliovars'")
        /* Group 1 = bottom 99%, group 2 = P99-99.9, group 3 = P99.9-99.99, 
            group 4 = top 0.01% */
        #delimit ;
        gen wlthgrp = cond(es_rank > 0.9999, 5,
                          cond(es_rank > 0.999, 4,
                          cond(es_rank > 0.99, 3, 
                          cond(es_rank > 0.9, 2, 1))));
        #delimit cr
        gen networth_pref_wgtd = networth_pref * wgt
        collapse (sum) networth_pref = networth_pref_wgtd, by(year wlthgrp)

        reshape wide networth_pref, i(year) j(wlthgrp)

        /* Create mutually inclusive top 0.1% and top 1% concepts from mutually 
            exclusive groups */
        gen networth_pref_top01 = networth_pref4 + networth_pref5
        gen networth_pref_top1 = networth_pref3 + networth_pref_top01
        gen networth_pref_top10 = networth_pref2 + networth_pref_top1
        
        * Create totals and check that totals are consistent
        egen total_wealth = rowtotal(networth_pref?)
        gen total_wealth_check = networth_pref1 + networth_pref_top10

        assert inrange(total_wealth / total_wealth_check, 0.999, 1.001)

        rename networth_pref5 networth_pref_top001

        * Add Forbes wealth
        merge 1:1 year using $inputs/forbeswlth.dta, keepusing(forbeswlth) assert(3) nogen

        foreach addforbes of varlist networth_pref_top* total_wealth {
            replace `addforbes' = `addforbes' + forbeswlth
        }
        
        foreach topgrp in top10 top1 top01 top001 {
            gen `topgrp'_scfpref = (networth_pref_`topgrp' / total_wealth) * 100
            assert inrange(`topgrp'_scfpref, 0, 100)
        }

    }/*%>*/
    if ("`1'" == "scfharmony_v3_forbes") {/*%<*/
        use $inputs/scf_revision.dta, clear
        keep year wgt networth tot_pen_db vehic veh_inst durables married funded_pen_db
        gen networth_pref = networth + funded_pen_db - vehic - durables

        es_rank_scf, rankvar(networth_pref) othersplitvars("`portfoliovars'")
        /* Group 1 = bottom 99%, group 2 = P99-99.9, group 3 = P99.9-99.99, 
            group 4 = top 0.01% */
        #delimit ;
        gen wlthgrp = cond(es_rank > 0.99999, 6, 
                          cond(es_rank > 0.9999, 5,
                          cond(es_rank > 0.999, 4,
                          cond(es_rank > 0.99, 3, 
                          cond(es_rank > 0.9, 2, 1)))));
        #delimit cr
        gen networth_pref_wgtd = networth_pref * wgt
        collapse (sum) networth_pref = networth_pref_wgtd, by(year wlthgrp)

        reshape wide networth_pref, i(year) j(wlthgrp)

        /* Create mutually inclusive top 0.1% and top 1% concepts from mutually 
            exclusive groups */
        gen networth_pref_top001 = networth_pref5 + networth_pref6
        gen networth_pref_top01 = networth_pref4 + networth_pref_top001
        gen networth_pref_top1 = networth_pref3 + networth_pref_top01
        gen networth_pref_top10 = networth_pref2 + networth_pref_top1
        
        * Create totals and check that totals are consistent
        egen total_wealth = rowtotal(networth_pref?)
        gen total_wealth_check = networth_pref1 + networth_pref_top10

        assert inrange(total_wealth / total_wealth_check, 0.999, 1.001)

        rename networth_pref6 networth_pref_top0001

        * Add Forbes wealth
        merge 1:1 year using $inputs/forbeswlth.dta, keepusing(forbeswlth) assert(3) nogen

        foreach addforbes of varlist networth_pref_top* total_wealth {
            replace `addforbes' = `addforbes' + forbeswlth
        }

        foreach topgrp in top10 top1 top01 top001 top0001 {
            gen `topgrp'_scfpref_forbes = (networth_pref_`topgrp' / total_wealth) * 100
            assert inrange(`topgrp'_scfpref_forbes, 0, 100)
        }

    }/*%>*/
    if ("`1'" == "scfharmony_v3") {/*%<*/
        use $inputs/scf_revision.dta, clear
        keep year wgt networth tot_pen_db vehic veh_inst durables married funded_pen_db
        gen networth_pref = networth + funded_pen_db - vehic - durables

        es_rank_scf, rankvar(networth_pref) othersplitvars("`portfoliovars'")
        /* Group 1 = bottom 99%, group 2 = P99-99.9, group 3 = P99.9-99.99, 
            group 4 = top 0.01% */
        #delimit ;
        gen wlthgrp = cond(es_rank > 0.99999, 6, 
                          cond(es_rank > 0.9999, 5,
                          cond(es_rank > 0.999, 4,
                          cond(es_rank > 0.99, 3, 
                          cond(es_rank > 0.9, 2, 1)))));
        #delimit cr
        gen networth_pref_wgtd = networth_pref * wgt
        collapse (sum) networth_pref = networth_pref_wgtd, by(year wlthgrp)

        reshape wide networth_pref, i(year) j(wlthgrp)

        /* Create mutually inclusive top 0.1% and top 1% concepts from mutually 
            exclusive groups */
        gen networth_pref_top001 = networth_pref5 + networth_pref6
        gen networth_pref_top01 = networth_pref4 + networth_pref_top001
        gen networth_pref_top1 = networth_pref3 + networth_pref_top01
        gen networth_pref_top10 = networth_pref2 + networth_pref_top1
        
        * Create totals and check that totals are consistent
        egen total_wealth = rowtotal(networth_pref?)
        gen total_wealth_check = networth_pref1 + networth_pref_top10

        assert inrange(total_wealth / total_wealth_check, 0.999, 1.001)

        rename networth_pref6 networth_pref_top0001

        foreach topgrp in top10 top1 top01 top001 top0001 {
            gen `topgrp'_scfpref = (networth_pref_`topgrp' / total_wealth) * 100
            assert inrange(`topgrp'_scfpref, 0, 100)
        }

        * Add Forbes wealth (not folding in right now)
        merge 1:1 year using $inputs/forbeswlth.dta, keepusing(forbeswlth) assert(3) nogen
    }/*%>*/
    if ("`1'" == "bank_rates") {/*%<*/
        /***********************************************************************
            Get bank rates by non-interest, non-mutual fund wealth 
                groups: bottom 90%, P90-99, P99-99.9, and top 0.1%
        ***********************************************************************/
        use $inputs/info_returns_rates.dta, clear

        keep if !missing(rank_nint_nmuf_wealth)
        assert missing(radjgross) & !missing(implied_bank_rate)

        replace rank_nint_nmuf_wealth = rank_nint_nmuf_wealth * 100

        gen grp = cond(rank_nint_nmuf_wealth >= 99, rank_nint_nmuf_wealth, ///
                  cond(rank_nint_nmuf_wealth >= 90, 90, 0))

        collapse (mean) r = implied_bank_rate, by(year grp)

        tostring grp, replace force usedisplayformat
        gen whichsource = "bank"
    }/*%>*/
    if ("`1'" == "boutique_rates") {/*%<*/
        /***********************************************************************
            Get boutique rates by AGI group; also get loan and savings 
                bond rates, which are constant 
        ***********************************************************************/
        use $inputs/info_returns_rates.dta, clear

        keep if !missing(radjgross)
        assert missing(rank_nint_nmuf_wealth) & !missing(rate_boutique) & ///
            !missing(rate_loan) & !missing(savings_bond_rate)

        sort year radjgross
        by year: assert rate_loan[1] == rate_loan[_N]
        by year: assert savings_bond_rate[1] == savings_bond_rate[_N]
        by year: assert rate_nonqualdivs[1] == rate_nonqualdivs[_N]

        gen grp = cond(radjgross >= 99, radjgross, cond(radjgross >= 90, 90, 0))

        collapse (firstnm) r_loan = rate_loan r_savbnd = savings_bond_rate ///
            (mean) r_boutique = rate_boutique (mean) r_nonqual = rate_nonqualdivs, by(year grp)

        reshape long r_, i(year grp) j(whichsource, string)
        rename r_ r

        tostring grp, replace force usedisplayformat

        replace grp = "" if inlist(whichsource, "savbnd", "loan", "nonqual")
        duplicates drop year grp whichsource, force
    }/*%>*/
   if ("`1'" == "interest_info") {/*%<*/
        tempfile info
        forv yr = 2001/2016 {
            insheet using "$inputs/`yr'/intinfo_gcollapse_Radjgross_20201117.csv", clear
            gen year = `yr'
            if (`yr' == 2001) {
                save `info'
            }
            else {
                append using `info'
                save `info', replace
            }
        }

        tempfile nqd
        insheet using "$inputs/nonqualdivs_gcollapse_Radjgross_allyears_20201117.csv", clear
        save `nqd'

        use `info', clear
        merge 1:1 year wadjgross using `nqd', keepusing(has_nonqualdivs) keep(1 3) nogen
    }/*%>*/
   if ("`1'" == "div_info") {/*%<*/
        tempfile info
        forv yr = 2009/2016 {
            insheet using "$inputs/`yr'/divinfo_gcollapse_Radjgross_20201117.csv", clear
            gen year = `yr'
            if (`yr' == 2009) {
                save `info'
            }
            else {
                append using `info'
                save `info', replace
            }
        }

        tempfile nqd
        insheet using "$inputs/nonqualdivs_gcollapse_Radjgross_allyears_20201117.csv", clear
        save `nqd'

        use `info', clear
        merge 1:1 year wadjgross using `nqd', keepusing(has_nonqualdivs) keep(1 3) nogen
    }/*%>*/
    if ("`1'" == "soca") {/*%<*/
        import delimited using "$inputs/asset_comp_by_year.csv", clear

        egen hard_assets = rowtotal(net_depbiz_pers_prop net_depbiz_real_prop ///
            net_farmland net_livestock net_other_land net_resrent_prop ///
            net_timber net_all_res)

        egen fin_assets = rowtotal(net_fut_con net_gov_ob net_other_bonds ///
            net_put_call net_te_bond_mut_funds)

        egen stocks = rowtotal(net_corps net_mut_funds)

    }/*%>*/
   if ("`1'" == "pthru_returns") {/*%<*/
        insheet using "$inputs/returns_passthrough.csv", clear

        keep year group groupvar pthru_val_hybnof pthru_returns_hybnof
        rename *_hybnof *
        replace pthru_val = 1e-5 * pthru_val
    }/*%>*/
    if ("`1'" == "scfrevision") {/*%<*/
        use $inputs/scf_revision.dta, clear
    }/*%>*/
    if ("`1'" == "scfrevision_v3") {/*%<*/
        use $inputs/scf_revision.dta, clear

        * Pension and car debt
        rename networth_pref networth_supple
        gen networth_pref = networth + funded_pen_db - vehic - durables
        replace hwpen = hwpen - tot_pen_db + funded_pen_db
        replace hwoth = hwoth - veh_inst

    }/*%>*/
    if ("`1'" == "dfarevision") {/*%<*/
        use $inputs/dfa_revision.dta, clear

    }/*%>*/
    if ("`1'" == "sarin") {/*%<*/

        insheet using "$inputs/sarin_20200417.csv", comma clear

        rename (t01_ss total_ss) (top01_ssw_cms total_ssw_cms)

        replace top01_ssw_cms = top01_ssw_cms * 1E12
        replace total_ssw_cms = total_ssw_cms * 1E12

    }/*%>*/
    if ("`1'" == "sabelhaus") {/*%<*/

        import excel using $inputs/ssw_szz.xls, ///
            sheet("nwdbsort") cellrange(A1:C55) firstrow clear

        gen consolidate_group = cond(wealth_grpy < 6, 1, 2)
        collapse (sum) ssw_shv = exp_ssw, by(year consolidate_group)

        reshape wide ssw_shv, i(year) j(consolidate_group)

        gen total_ssw_shv = ssw_shv1 + ssw_shv2
        drop ssw_shv1
        rename ssw_shv2 top01_ssw_shv

    }/*%>*/
    if ("`1'" == "cstat_multiples") {/*%<*/
        setup_compustat_variables
        keep if sale > 0 & at > 0 
    }/*%>*/

    if ("`1'" == "telescope_parts_v3") {/*%<*/
  
        import delim $inputs/wealthparts_telescope.csv, clear

        assert sign(nonmort) == -1

        gen pthru_pref = s_value_avg_ebitda_red + p_value_avg_ebitda_red + solepropw + ///
                            missing_scorp + missing_pship
        gen pthru_base = solepropw_szz_sz20_scaled + scorw_szzhybnof_scaled + partw_szzhybnof_sz20_scaled
        gen hwbus_pref = (0.2 * ccorw_9010) + pthru_pref
		gen hwbus_base = (0.2 * ccorw_9010) + pthru_base
        gen hwequ_pref = 0.8 * ccorw_9010
        gen hwequ_base = hwequ_pref
        gen hwfix_pref = taxbond_info + muni + currency + taxbond_muf
        gen hwfix_base = hwfix_pref
        gen hwhou_pref = ownerhome_szz + rentalhome + ownermort + rentalmort
        gen hwhou_base = hwhou_pref
        gen hwoth_pref = nonmort + miscw_hweal$preferred_defn_late_v2
        gen hwoth_base = nonmort_ini + miscw_hweal$preferred_defn_late
        rename (penw_szz_scaled ccorw_9010) (hwpen_pref ccorw_pref)
        gen hwpen_base = penw_szz_scaled_v8
        gen ccorw_base = ccorw_pref

        * Forbes edits
        replace hwfix_pref = hwfix_pref + forbes400_hwfix
        replace hwequ_pref = hwequ_pref + forbes400_hwequ
        replace hwbus_pref = hwbus_pref + forbes400_hwbus
        replace hwhou_pref = hwhou_pref + forbes400_hwhou
        replace hwoth_pref = hwoth_pref + forbes400_hwoth

        replace pthru_pref = pthru_pref + forbes400_pthru 
        replace ccorw_pref = ccorw_pref + forbes400_ccorw

        gen pen_hou_oth_pref = hwhou_pref + hwoth_pref + hwpen_pref
        gen pen_hou_oth_base = hwhou_base + hwoth_base + hwpen_base
        
                /*******************************************************************
                    (2.2.2.2) Make equal returns portfolio delineation
                *******************************************************************/

        gen pthru_equrtrns = scorw_ini + partw_ini + solepropw_ini 
        gen pthru_equ = scorw + partw_sz20_scaled + solepropw_sz20_scaled
        gen hwbus_equrtrns = pthru_equrtrns + (0.2 * ccorw_ini)
        gen hwbus_equ = (0.2 * ccorw) + pthru_equ
        gen hwequ_equrtrns = 0.8 * ccorw_ini
        gen hwequ_equ = 0.8 * ccorw
        gen hwfix_equrtrns = taxbond_ini + muni_ini + currency_ini
        gen hwfix_equ = taxbond + muni + currency + taxbond_mufmisc_sz
        gen hwhou_equrtrns = rentalhome_ini + ownerhome_ini + ownermort_ini + rentalmort_ini
        gen hwhou_equ = ownerhome + rentalhome + ownermort + rentalmort
        rename (hwpen_ini nonmort_ini ccorw_ini) (hwpen_equrtrns hwoth_equrtrns ccorw_equrtrns)
        gen hwpen_equ = hwpen_equrtrns
        gen hwoth_equ = hwoth_equrtrns
        gen ccorw_equ = ccorw

        gen pen_hou_oth_equrtrns = hwhou_equrtrns + hwpen_equrtrns + hwoth_equrtrns
        gen pen_hou_oth_equ = hwhou_equ + hwpen_equ + hwoth_equ

                /*******************************************************************
                    (2.2.2.3) Finagle formatting to keep only own-ranked totals
                *******************************************************************/

        keep group rankscen *_equrtrns *_pref *_base *_equ

        reshape wide *_pref *_equrtrns *_base *_equ, i(group) j(rankscen)

        rename (group *_pref112 *_equrtrns20 *_base$preferred_defn_late *_equ$sz_wlth_defn_late_v3) ///
               (grp *_pref *_equrtrns *_base *_equ)

        collapse (sum) *_pref *_equrtrns *_base *_equ, by(grp)

    }/*%>*/
    if ("`1'" == "hetero_vals_v3") {/*%<*/
        * Produces alternative industry panel collapses.
        build_industry_valuations_v3

        * Add variables from SOI collapses
        tempfile SOI
        load_analysis_data soi_size_v3
        rename nc4 naics_4d
        * NAICS codes are an issue prior to 1998 in SOI
        keep if year > 1998 
        * Pre-2002 NAICS code fixes following Capitalists correlates program
        * Recodes naics_4d, so we need to collapse by naics_4d after
        fix_naics 
        drop if naics_4d == 5511
        collapse (sum) ebitd profits sale capital assets=tot_assets count, by(corp_form naics_4d year)
        keep if corp_form == "S"
        drop corp_form
        rename * *_S_soi
        rename (year_S_soi naics_4d_S_soi) (year naics_4d)
        save `SOI'

        tempfile top
        insheet using "panel_allpvtvalues.csv", comma clear
        save `top'

        tempfile indyS
        insheet using "scorp_nc4_valuations_20220112.csv", comma clear
        save `indyS'

        tempfile indyC
        insheet using "ccorp_nc4_valuations_20220112.csv", comma clear
        save `indyC'

        use `indyS', clear
        merge 1:1 year naics_4d using `indyC', nogen
        keep year naics_4d longname value_s* value_c* ebitd* ///
            eqmultiple_sale eqmultiple_assets eqmultiple_ebitd
        merge 1:1 year naics_4d using `top', nogen keepusing(value_all* profits_s)
        merge 1:1 year naics_4d using `SOI', nogen
    }/*%>*/
    if ("`1'" == "corelogic") {/*%<*/
        * Have to add state abbreviations to merge to DQ data
        tempfile names
        insheet using "$inputs/state_names.csv", clear names
        save `names'

        use "$inputs/corelogic_state_sa_20191022.dta", clear
        keep state mdate month cl_sa
        rename cl_sa price_index_cl
        gen year = yofd(dofm(mdate))
        collapse (mean) price_index_cl, by(state year)
        rename state state_name
        merge m:1 state_name using `names', keep(3) nogen
    }/*%>*/
    if ("`1'" == "dq_factors") {/*%<*/
        insheet using "$inputs/dq_state_collapse.csv", clear
    }/*%>*/
    if ("`1'" == "proptax") {/*%<*/
        use "$inputs/proptaxrev_tpc.dta", clear
    }/*%>*/
    if ("`1'" == "attom") {/*%<*/
        tempfile names
        insheet using "$inputs/state_names.csv", clear names
        save `names'

        * Grabbed from ATTOM summary from internet search: "pennsylvania
        * effective tax rate"
        * https://www.watchdog.org/pennsylvania/pennsylvania-s-property-tax-rate-th-highest-in-u-s/article_e045ebe4-5a1a-11e8-a306-6f773b8e2630.html
        insheet using "$inputs/attom_etr_2017.csv", clear
        rename state state_name
        replace state_name = strupper(state_name)
        merge m:1 state_name using `names', keep(3) nogen
        replace etr = .01 * etr
        rename etr etr_attom
        keep state etr_attom numberofhomes
    }/*%>*/
    if ("`1'" == "housing_factors") {/*%<*/
        * Generates housing factors over time.
        build_housing_factors
        insheet using "housing_factors_20200127.csv", comma clear
    }/*%>*/

    * Old stuff
    if ("`1'" == "soi_size") {/*%<*/
        /* 
            gen size_bucket = cond(mean_sale < 10e3, 1, 
                               cond(mean_sale < 100e3, 2, 
                                cond(mean_sale < 500e3, 3, 
                                 cond(mean_sale < 1e6, 4, 
                                  cond(mean_sale < 10e6, 5, 
                                   cond(mean_sale < 100e6, 6, 7))))));
        */
        insheet using "$inputs/corpform_size_nc4_collapse_20190502.csv", comma clear
    }/*%>*/
    if ("`1'" == "soi_size_v3") {/*%<*/
        /*
            gen profit_bucket = cond(profits < 50e3, 1, 2); // In 000s
        */
        insheet using "$inputs/corpform_profitsize_nc4_collapse_20220114.csv", comma clear
    }/*%>*/
    if ("`1'" == "top1s") {/*%<*/
        insheet using "$inputs/masked_s4d_decomp_top1_clean_extra.csv", comma clear
        keep nc4 sales profits ass nfirms nowners ntop1owners totordinc ebitda_full year
        rename nc4 naics_4d
        rename (sales ass) (sale assets)
        rename ebitda_full ebitd
        rename ntop1owners nt1owners
    }/*%>*/
    if ("`1'" == "top1p") {/*%<*/
        insheet using "$inputs/masked_p4d_decomp_top1_clean_extra.csv", comma clear
        keep nc4 sales profits ass nfirms nowners ntop1owners totordinc ebitda_full year
        rename nc4 naics_4d
        rename (sales ass) (sale assets)
        rename ebitda_full ebitd
        rename ntop1owners nt1owners
    }/*%>*/
    if ("`1'" == "top01s") {/*%<*/
        insheet using "$inputs/masked_s4d_decomp_top01_clean_extra.csv", comma clear
        keep nc4 sales profits ass nfirms nowners ntoppt1owners totordinc ebitda_full year
        rename nc4 naics_4d
        rename (sales ass) (sale assets)
        rename ebitda_full ebitd
        rename ntoppt1owners nt01owners
    }/*%>*/
    if ("`1'" == "top01p") {/*%<*/
        insheet using "$inputs/masked_p4d_decomp_top01_clean_extra.csv", comma clear
        keep nc4 sales profits ass nfirms nowners ntoppt1owners totordinc ebitda_full year
        rename nc4 naics_4d
        rename (sales ass) (sale assets)
        rename ebitda_full ebitd
        rename ntoppt1owners nt01owners
    }/*%>*/
    if ("`1'" == "alls") {/*%<*/
        insheet using "$inputs/masked_s4d_decomp_all_clean.csv", comma clear
        keep nc4 sales profits ass nfirms nowners totordinc ebitda_full year
        rename nc4 naics_4d
        rename (sales ass) (sale assets)
        rename ebitda_full ebitd
        rename nowners nallowners
    }/*%>*/
    if ("`1'" == "allp") {/*%<*/
        insheet using "$inputs/masked_p4d_decomp_all_clean.csv", comma clear
        keep nc4 sales profits ass nfirms nowners totordinc ebitda_full year
        rename nc4 naics_4d
        rename (sales ass) (sale assets)
        rename ebitda_full ebitd
        rename nowners nallowners
    }/*%>*/
    if ("`1'" == "t1sp") {/*%<*/
        tempfile x
        load_analysis_data top1p
        rename * *_p
        rename (naics_4d_p year_p) (naics_4d year)
        save `x'
        load_analysis_data top1s
        rename * *_s
        rename (naics_4d_s year_s) (naics_4d year)
        merge 1:1 naics_4d year using `x', keep(1 2 3) nogen
    }/*%>*/
    if ("`1'" == "t01sp") {/*%<*/
        tempfile x
        load_analysis_data top01p
        rename * *_p
        rename (naics_4d_p year_p) (naics_4d year)
        save `x'
        load_analysis_data top01s
        rename * *_s
        rename (naics_4d_s year_s) (naics_4d year)
        merge 1:1 naics_4d year using `x', keep(1 2 3) nogen
    }/*%>*/
    if ("`1'" == "allsp") {/*%<*/
        tempfile x
        load_analysis_data allp
        rename * *_p
        rename (naics_4d_p year_p) (naics_4d year)
        save `x'
        load_analysis_data alls
        rename * *_s
        rename (naics_4d_s year_s) (naics_4d year)
        merge 1:1 naics_4d year using `x', keep(1 2 3) nogen
    }/*%>*/
    if ("`1'" == "pubpvt_c") {/*%<*/

        tempfile profits
        insheet using "$inputs/profitsdivs_pubvpri_ccorps_20170825.csv", clear
        drop if _n >= 49 
        drop v8-v12
        keep if year > 1993
        destring public_co-count, replace
        save `profits'

        tempfile inv
        insheet using "$inputs/investment_pubvpri_ccorps_20170825.csv", clear
        keep year public_co investment liquid_assets
        save `inv'
        
        use `profits', clear
        merge 1:1 year public_co using `inv', keep(1 3) nogen
        format %15.3g dividends-liquid_assets
    }/*%>*/

end/*%>*/

capture program drop usd2016/*%<*/
program usd2016
    syntax, var(varname) yr(varname)

*https://fred.stlouisfed.org/series/CPIAUCSL#0
*annual end of period value divided by 2016 value

    replace `var'= `var'*2.810428241  if `yr'==1980 & `var'!=.
    replace `var'= `var'*2.580456961  if `yr'==1981 & `var'!=.
	replace `var'= `var'*2.485373593  if `yr'==1982 & `var'!=.
    replace `var'= `var'*2.394684418  if `yr'==1983 & `var'!=.
	replace `var'= `var'*2.301620853  if `yr'==1984 & `var'!=.
    replace `var'= `var'*2.217543379  if `yr'==1985 & `var'!=.
	replace `var'= `var'*2.191525271  if `yr'==1986 & `var'!=.
    replace `var'= `var'*2.100527682  if `yr'==1987 & `var'!=.
	replace `var'= `var'*2.011772991  if `yr'==1988 & `var'!=.
    replace `var'= `var'*1.922573238  if `yr'==1989 & `var'!=.
	replace `var'= `var'*1.809396423  if `yr'==1990 & `var'!=.		
    replace `var'= `var'*1.757026049  if `yr'==1991 & `var'!=.
    replace `var'= `var'*1.706401968  if `yr'==1992 & `var'!=.	
    replace `var'= `var'*1.659747095  if `yr'==1993 & `var'!=.
    replace `var'= `var'*1.617728181  if `yr'==1994 & `var'!=.
    replace `var'= `var'*1.577784276  if `yr'==1995 & `var'!=.
    replace `var'= `var'*1.526216216  if `yr'==1996 & `var'!=.
    replace `var'= `var'*1.500747837  if `yr'==1997 & `var'!=.
    replace `var'= `var'*1.477013382  if `yr'==1998 & `var'!=.
    replace `var'= `var'*1.438513033  if `yr'==1999 & `var'!=.
    replace `var'= `var'*1.390727377  if `yr'==2000 & `var'!=.
    replace `var'= `var'*1.368776776  if `yr'==2001 & `var'!=.
    replace `var'= `var'*1.335649065  if `yr'==2002 & `var'!=.
    replace `var'= `var'*1.309008086  if `yr'==2003 & `var'!=.
    replace `var'= `var'*1.266671883  if `yr'==2004 & `var'!=.
    replace `var'= `var'*1.225749621  if `yr'==2005 & `var'!=.
    replace `var'= `var'*1.195573609  if `yr'==2006 & `var'!=.
    replace `var'= `var'*1.14838847   if `yr'==2007 & `var'!=.
    replace `var'= `var'*1.14864379   if `yr'==2008 & `var'!=.
    replace `var'= `var'*1.117204286  if `yr'==2009 & `var'!=.
    replace `var'= `var'*1.101368881  if `yr'==2010 & `var'!=.
    replace `var'= `var'*1.06864622   if `yr'==2011 & `var'!=.
    replace `var'= `var'*1.050168454  if `yr'==2012 & `var'!=.
    replace `var'= `var'*1.034500241  if `yr'==2013 & `var'!=.
    replace `var'= `var'*1.027639765  if `yr'==2014 & `var'!=.
    replace `var'= `var'*1.020916896  if `yr'==2015 & `var'!=.
    replace `var'= `var'*1            if `yr'==2016 & `var'!=.	

end/*%>*/

capture program drop fix_naics/*%<*/
program define fix_naics
    * From rents.xpez-20161125.do 

    /* 
    insheet using "naics_4d_correlates.csv", clear
    gsort - profits_scorp_real_99_13

    Top 10:
    Restaurants 7225
    Building equipment cntrctr  2382
    Residential building constr 2361
    Other specialty trade cntrctr   2389
    Machinery/supply merch whlsl    4238

    Nonresidential building constr  2362
    Building finishing cntrctr  2383
    Building foundation/exterior cntrctr    2381
    Misc. durable goods merch whlsl 4239
    Grocery/related product whlsl   4244

    Tom Cui email dated 03/30/2017: 

    Here’s how you can use it to spot check your data. If you see a NAICS
    4-digit code that disappears over one NAICS revision, go to the syzzle repo,
    code/industry_level_correlates/naics_crosswalk/orig, and select the
    spreadsheet that shows changes to the revision over which the code
    disappeared.

    For example, with NAICS 7225 you’ll see the code only exists in 2012, and
    by checking the crosswalk file “2012_to_2007_NAICS” you’ll see the NAICS
    code is NAICS 7221 and 7222 merged together and renamed.
    */
    capture rename nc4 naics_4d

    /* 7225
        * introduced in 2012 NAICS revision.
        * was 7221 and 7222 in 2007 revision. Same in 2002
    */
    replace naics_4d = 7225 if inlist(naics_4d, 7221, 7222)

    /* 238*, 2373, 2379
        * In Naics since 2002 revision
        * Had codes from 2351-2359 in 1997 revision
        * See proposal in photo pic, attempt to aggregate
          into 3 groups
    */
    #delimit ;
    replace naics_4d = 2381 if inlist(naics_4d, 2352, 2354, 2355, 2356, 2357,
                                        2358, 2383);
    replace naics_4d = 2382 if inlist(naics_4d, 2351, 2353);
    replace naics_4d = 2389 if inlist(naics_4d, 2359);
    #delimit cr

    /* 2361 and 2362 
        * In Naics since 2002
        * Had codes from 2332-2333, 2349 in 1997 version
    */
    replace naics_4d = 2361 if naics_4d == 2332
    replace naics_4d = 2362 if naics_4d == 2333
    replace naics_4d = 2372 if naics_4d == 2331
    replace naics_4d = 2373 if naics_4d == 2341
    replace naics_4d = 2379 if inlist(naics_4d, 2349, 2371)

    /* 423*, 424*
        * In naics since 2002 
        * Had codes like 421* and 422* in 1997 version
        * 2002 revision broke out exchanges and dealers from traditional
          wholesale operations, now naics 4251 (still pretty small)
    */
    replace naics_4d = naics_4d + 20 if inrange(naics_4d, 4211, 4219)
    replace naics_4d = naics_4d + 20 if inrange(naics_4d, 4221, 4229)

    /* Added 2020-02-19, EZ */
    /* 4521 & 4529 rearranged into 4522 and 4523 in 2017 revision */
    replace naics_4d = 4523 if naics_4d == 4521 | naics_4d == 4529 | naics_4d == 4522   

    /* 5182, 5151, 5152, 517*
        * in naics since 2002
        * internet and telecom caused a bunch of new industries
    */
    replace naics_4d = 5191 if inlist(naics_4d, 5141, 5161, 5181)
    replace naics_4d = 5182 if naics_4d == 5142 
    replace naics_4d = 5151 if naics_4d == 5131
    replace naics_4d = 5175 if naics_4d == 5132 | naics_4d == 5152
    replace naics_4d = 5179 if inlist(naics_4d, 5133, 5170, 5171, 5172, 5173, 5174)

    /* 5161 internet broken out from non-internet
        * In naics since 2002
    */
    replace naics_4d = 5111 if naics_4d == 5161

    /* Additional fixes as of 2019-06-22 */

    /* 4860 is not a NAICS code 
       In the SOI Corporation Sourcebook: 486000|Pipeline Transportation
       So coding it to 4869, which is Other Pipeline Transportation
    */
    replace naics_4d = 4869 if naics_4d == 4860

    gen nc4 = naics_4d

end/*%>*/

capture program drop build_industry_valuations_v3/*%<*/
program define build_industry_valuations_v3

    ****************************************************************************
    * 1. DATA SETUP
    ****************************************************************************
    * Names %<
    ****************************************************************************
    tempfile names
    insheet using $inputs/naics_4d_correlates.csv, clear
    keep naics_4d longname
    save `names'/*%>*/
    ****************************************************************************

    ****************************************************************************
    * SOI data%<
    ****************************************************************************
    tempfile Msoi
    load_analysis_data soi_size_v3
    rename nc4 naics_4d
    save `Msoi'/*%>*/
    ****************************************************************************

    ****************************************************************************
    * Pvt share of C%<
    ****************************************************************************
    tempfile pvtc
    load_analysis_data pubpvt_c

    bys year: egen total_dividends = total(dividends)
    bys year: egen total_capital = total(capital)
    bys year: egen total_sale = total(sale)
    bys year: egen total_profits = total(profits)

    gen dividends_share = dividends/total_dividends
    gen capital_share = capital/total_capital
    gen sale_share = sale/total_sale
    gen profits_share = profits/total_profits

    * Collapse missing assets and ebitd
    gen assets_share = capital_share
    gen ebitd_share = profits_share 

    keep if public_co == 1
    collapse (mean) dividends_share-ebitd_share
    gen corp_form = "C"
    save `pvtc'/*%>*/
    ****************************************************************************

    ****************************************************************************
    * Compustat valuation data.%<
    ****************************************************************************
    *make_cstat_20190610
    use "$inputs/compustat_profits_20190610.dta", clear

    keep fyear conm gvkey standard_naics Q_bsw mktval_equity sale at ppent profits ebit ebitd

    gen naics_4d = floor(standard_naics/100)
    fix_naics

    #delimit ;
    /*
    gen size_bucketH = cond(sale < 10, 5,
                       cond(sale < 100, 6, 
                        cond(sale < 1000, 7, 
                         cond(sale < 10000, 8, 9))));
    */
    gen size_bucketH = cond(sale < 10, 5,
                        cond(sale < 100, 6, 7));
    #delimit cr
    gen late = fyear > 2000 
    bys gvkey late: egen size_bucket = max(size_bucketH)

    gen mktval = Q_bsw * at
    keep if mktval > 0

    gen multiple_assets = Q_bsw
    gen multiple_capital = Q_bsw * at / ppent
    gen multiple_sale = Q_bsw * at / sale
    gen multiple_ebitd = Q_bsw * at / ebitd

    gen eqmultiple_assets = mktval_equity / at
    gen eqmultiple_capital = mktval_equity / ppent
    gen eqmultiple_sale = mktval_equity / sale
    gen eqmultiple_ebitd = mktval_equity / ebitd

    foreach v in "assets" "capital" "sale" "ebitd" {
        trim_tails multiple_`v', level(.01)
        trim_tails eqmultiple_`v', level(.01)
    }
    drop *multiple*_ut
    drop *multiple*_w
    tempfile cstat
    save `cstat'
/*%>*/
    ****************************************************************************

    ****************************************************************************
    * Four digit collapses from OZ for S-corps and pships%<
    ****************************************************************************
    tempfile t1sp t01sp allsp
    load_analysis_data t1sp
    fix_naics
    collapse (sum) *_p *_s, by(naics_4d year)
    save `t1sp'
    load_analysis_data t01sp
    fix_naics
    collapse (sum) *_p *_s, by(naics_4d year)
    save `t01sp'
    load_analysis_data allsp
    fix_naics
    collapse (sum) *_p *_s, by(naics_4d year)
    save `allsp'/*%>*/
    ****************************************************************************

    ****************************************************************************
    * Make multiples%<
    ****************************************************************************
    tempfile Msize Mindy Msizeindy Myearly Mfinsplit

    * Multiples by firm size within Compustat
    use `cstat', clear
    gen here = 1
    keep if fyear == 2014
    collapse (sum) mktval* at ppent sale ebitd count=here, by(size_bucket)
    gen eqmultiple_assets = mktval_equity/at
    gen eqmultiple_capital = mktval_equity/ppent
    gen eqmultiple_sale = mktval_equity/sale
    gen eqmultiple_ebitd = mktval_equity/ebitd
    save `Msize'

    * Multiples by 2-digit and 3-digit industry (dollar weighted by value of
    *   equity)
    use `cstat', clear
    rename fyear year
    bys naics_4d year: egen count = count(gvkey)
    collapse (sum) mktval* at ppent profits sale ebitd, by(naics_4d year count)
    gen eqmultiple_assets_nc4 = mktval_equity/at
    gen eqmultiple_capital_nc4 = mktval_equity/ppent
    gen eqmultiple_sale_nc4 = mktval_equity/sale
    gen eqmultiple_ebitd_nc4 = mktval_equity/ebitd
    gen eqmultiple_profits_nc4 = mktval_equity/profits
    gen fin = naics_4d > 5200 & naics_4d < 5300
    drop if fin == 1
    drop if count < 5
    impose_bounds eqmultiple_assets_nc4, lower(0) upper(5)
    impose_bounds eqmultiple_sale_nc4, lower(0) upper(5)
    impose_bounds eqmultiple_capital_nc4, lower(0) upper(20)
    impose_bounds eqmultiple_ebitd_nc4, lower(0) upper(40)
    impose_bounds eqmultiple_profits_nc4, lower(0) upper(50)
    keep year eqmultiple* naics_4d
    save `Mindy'

    use `cstat', clear
    collapse (mean) multiple* [aw=mktval], by(fyear)
    save `Myearly'

    use `cstat', clear
    gen here = 1
    collapse (sum) mktval at ppent sale ebitd count=here, by(fyear size_bucket naics_4d)
    gen multiple_assets = mktval/at
    gen multiple_capital = mktval/ppent
    gen multiple_sale = mktval/sale
    gen multiple_ebitd = mktval/ebitd
    foreach v in "assets" "capital" "sale" "ebitd" {
        trim_tails multiple_`v', level(.01)
    }
    save `Msizeindy'

    use `cstat', clear
    gen fin = nc4 > 5200 & nc4 < 5300
    tabstat sale-profits mktval*, by(fin) s(sum) 
    ****************************************************************************
    * Outliers in early 2000s
    * Some telecom companies lost > 50B, including ATT losing 100B
    * For computing aggregate multiples, drop observations if profits less than 
    * 0.1 percentile, which is around -2B
    cumul profits if fin != 1, gen(profit_rank)
    drop if profit_rank < .001
    ****************************************************************************
    collapse (sum) at ebitd profits sale ppent mktval*, by(fin fyear)
    rename fyear year
    gen eqmultiple_assets_agg = mktval_equity/at
    gen eqmultiple_capital_agg = mktval_equity/ppent
    gen eqmultiple_sale_agg = mktval_equity/sale
    gen eqmultiple_ebitd_agg = mktval_equity/ebitd
    gen eqmultiple_profits_agg = mktval_equity/profits
    drop if fin == 1
    impose_bounds eqmultiple_assets_agg, lower(0) upper(5)
    impose_bounds eqmultiple_sale_agg, lower(0) upper(5)
    impose_bounds eqmultiple_capital_agg, lower(0) upper(20)
    impose_bounds eqmultiple_ebitd_agg, lower(0) upper(40)
    impose_bounds eqmultiple_profits_agg, lower(0) upper(50)
    keep year eqmultiple*
    outsheet year eqmultiple_*_agg using "cstat_agg_valuations_20210713.csv", comma replace 
    save `Mfinsplit'/*%>*/
    ****************************************************************************

    ****************************************************************************
    * 2. DATA BUILD
    ****************************************************************************
    use `Msoi', clear
    * NAICS codes are an issue prior to 1998 in SOI
    keep if year > 1998 
    * Pre-2002 NAICS code fixes following Capitalists correlates program
    * Recodes naics_4d, so we need to collapse by naics_4d after
    fix_naics 
    drop if naics_4d == 5511
    collapse (sum) ebitd profits sale capital assets=tot_assets count, ///
        by(corp_form naics_4d year profit_bucket)
    merge m:1 year using `Mfinsplit', keep(3) nogen
    merge m:1 year naics_4d using `Mindy', keep(1 3) nogen
    merge m:1 corp_form using `pvtc', keep(1 3) nogen
    merge m:1 naics_4d using `names', keep(1 3) nogen

    ****************************************************************************
    * Make valuations using industry multiples
    ****************************************************************************

    * Added 2020-01-09: Capitalist valuation
    gen ebitd_syzz = cond(corp_form == "S", ebitd - .75 * profits, ebitd)
    gen ebitd_syzz_hyb = cond(corp_form == "S" & profit_bucket == 1, ebitd - .75 * profits, ebitd)
    gen eqmultiple_ebitd_syzz_agg = eqmultiple_ebitd_agg
    gen ebitd_syzz_share = ebitd_share
    gen ebitd_syzz_hyb_share = ebitd_share

    foreach x of varlist profits sale capital ebitd assets {
        gen eqmultiple_`x' = cond(eqmultiple_`x'_nc4 != ., eqmultiple_`x'_nc4, eqmultiple_`x'_agg)
    }
    gen eqmultiple_ebitd_syzz = eqmultiple_ebitd
    gen eqmultiple_ebitd_syzz_hyb = eqmultiple_ebitd

    * Simple valuations
    foreach x of varlist profits sale capital ebitd ebitd_syzz ebitd_syzz_hyb assets {
        gen value_S_`x' = 1e3 * eqmultiple_`x' * `x' if corp_form == "S"
        gen value_C_`x' = 1e3 * eqmultiple_`x' * `x' * (1 - `x'_share) if corp_form == "C"
        
        impose_bounds value_S_`x', lower(0) upper(1e12)
        impose_bounds value_C_`x', lower(0) upper(1e12)
    }

    * Collapse profit buckets after applying hybrid valuation
    collapse (sum) value_S_* value_C_* ///
        profits sale capital ebitd ebitd_syzz ebitd_syzz_hyb assets ///
        count, by(longname corp_form naics_4d year eqmultiple_*)

    foreach x of varlist profits sale capital ebitd ebitd_syzz ebitd_syzz_hyb assets {
        bys year naics_4d: egen Sh = max(value_S_`x')
        bys year naics_4d: egen Ch = max(value_C_`x')
        gen value_`x' = Sh + Ch
        drop Sh Ch

        replace value_S_`x' = 1e-9 * value_S_`x'
        replace value_C_`x' = 1e-9 * value_C_`x'
        replace value_`x' = 1e-9 * value_`x'
    }
    egen value_S_modelaverage = rowmean(value_S_sale value_S_capital value_S_ebitd_syzz)
    egen value_C_modelaverage = rowmean(value_C_sale value_C_capital value_C_ebitd_syzz)
    egen value_modelaverage = rowmean(value_sale value_capital value_ebitd_syzz)

    egen value_S_modelaverage_hyb = rowmean(value_S_sale value_S_assets value_S_ebitd_syzz_hyb)
    egen value_C_modelaverage_hyb = rowmean(value_C_sale value_C_assets value_C_ebitd_syzz_hyb)
    egen value_modelaverage_hyb = rowmean(value_sale value_assets value_ebitd_syzz_hyb)

    * Choose preferred spec
    gen value_S_preferred = value_S_modelaverage
    gen value_C_preferred = value_C_modelaverage
    gen value_preferred = value_modelaverage

    gen value_S_preferred_v3 = value_S_modelaverage_hyb
    gen value_C_preferred_v3 = value_C_modelaverage_hyb
    gen value_preferred_v3 = value_modelaverage_hyb
    
    * Convert to Millions
    gen value_S_perfirm_avg = 1e3 * value_S_preferred / count if corp_form == "S"
    gen value_S_perfirm = 1e3 * value_S_preferred / count if corp_form == "S"

    * list sale capital ebitd eqmultiple_sale eqmultiple_capital eqmultiple_ebitd value_S_modelaverage if naics_4d == "4411" & year == 2014
    * sale    capital      ebitd   eqmult~e   eqmult~l   eqmult~d   value_..
    * 5.76e+08   1.33e+07   1.23e+07   .4028071   3.540416   8.736609   128.8815 

    * Outsheet for inside use
    #delimit ;
    outsheet
        year naics_4d count longname
        eqmultiple_*_agg eqmultiple_*_nc4 
        eqmultiple_assets
        eqmultiple_profits eqmultiple_sale eqmultiple_capital eqmultiple_ebitd*
        value_S_profits value_S_sale value_S_capital value_S_ebitd
        value_S_assets value_S_ebitd_syzz*
        value_S_modelaverage* value_S_preferred*
        ebitd ebitd_syzz ebitd_syzz_hyb
        if corp_form == "S" 
        /* using "scorp_nc4_valuations_20190615.csv", comma replace; */
        /* using "scorp_nc4_valuations_20191218.csv", comma replace; */
        /*using "scorp_nc4_valuations_20200109.csv", comma replace; */
        using "scorp_nc4_valuations_20220112.csv", comma replace; 
    outsheet
        year naics_4d count longname
        eqmultiple_*_agg eqmultiple_*_nc4 
        eqmultiple_assets
        eqmultiple_profits eqmultiple_sale eqmultiple_capital eqmultiple_ebitd*
        value_C_profits value_C_sale value_C_capital value_C_ebitd
        value_C_assets value_C_ebitd_syzz*
        value_C_modelaverage* value_C_preferred*
        if corp_form == "C" 
        /* using "ccorp_nc4_valuations_20190615.csv", comma replace; */
        /* using "ccorp_nc4_valuations_20191218.csv", comma replace; */
        /* using "ccorp_nc4_valuations_20200109.csv", comma replace; */
        using "ccorp_nc4_valuations_20220112.csv", comma replace; 
    #delimit cr

    ****************************************************************************
    * Make valuations for different groups of owners.
    ****************************************************************************
    tempfile sval
    *insheet using "scorp_nc4_valuations_20190615.csv", clear
    *insheet using "scorp_nc4_valuations_20191218.csv", clear
    *insheet using "scorp_nc4_valuations_20200109.csv", clear
    insheet using "scorp_nc4_valuations_20220112.csv", clear
    
    save `sval'

    foreach x in "all" "t1" "t01" {
        use ``x'sp', clear
        merge 1:1 year naics_4d using `sval', keep(3) nogen

        * Added 2020-01-09: Capitalist valuation inputs
        gen ebitd_syzz_s = ebitd_s - .75 * profits_s
        gen ebitd_syzz_p = ebitd_p - .75 * profits_p
        gen profits_syzz_s = profits_s - .75 * profits_s
        gen profits_syzz_p = profits_p - .75 * profits_p
        gen eqmultiple_profits_syzz = eqmultiple_profits

        * Make alternative specs
        foreach v in "profits" "sale" "assets" "ebitd" "ebitd_syzz" /*"ebitd_syzz_hyb"*/ {
            gen value_`x'S_`v' = eqmultiple_`v' * `v'_s
            gen value_`x'P_`v' = eqmultiple_`v' * `v'_p
            
            impose_bounds value_`x'S_`v', lower(0) upper(1e12)
            impose_bounds value_`x'P_`v', lower(0) upper(1e12)

            bys year naics_4d: egen Sh = max(value_`x'S_`v')
            bys year naics_4d: egen Ph = max(value_`x'P_`v')
            gen value_`x'_`v' = Sh + Ph
            drop Sh Ph

            replace value_`x'S_`v' = 1e-9 * value_`x'S_`v'
            replace value_`x'P_`v' = 1e-9 * value_`x'P_`v'
            replace value_`x'_`v' = 1e-9 * value_`x'_`v'
        }
        egen value_`x'S_modelaverage = rowmean(value_`x'S_sale value_`x'S_assets value_`x'S_ebitd_syzz)
        egen value_`x'P_modelaverage = rowmean(value_`x'P_sale value_`x'P_assets value_`x'P_ebitd_syzz)
        egen value_`x'_modelaverage =  rowmean(value_`x'_sale  value_`x'_assets  value_`x'_ebitd_syzz)

        /*
        egen value_`x'S_modelaverage_hyb = rowmean(value_`x'S_sale value_`x'S_assets value_`x'S_ebitd_syzz_hyb)
        egen value_`x'P_modelaverage_hyb = rowmean(value_`x'P_sale value_`x'P_assets value_`x'P_ebitd_syzz_hyb)
        egen value_`x'_modelaverage_hyb =  rowmean(value_`x'_sale value_`x'_assets  value_`x'_ebitd_syzz_hyb)
        */

        * Choose preferred spec
        gen value_`x'S_preferred = value_`x'S_modelaverage
        gen value_`x'P_preferred = value_`x'P_modelaverage
        gen value_`x'_preferred = value_`x'_modelaverage

        /*
        * Need the ownerfirm collapses with hybrid at person level, so skipping
        * for this appendix table.
        gen value_`x'S_preferred_v3 = value_`x'S_modelaverage_hyb
        gen value_`x'P_preferred_v3 = value_`x'P_modelaverage_hyb
        gen value_`x'_preferred_v3 = value_`x'_modelaverage_hyb
        */

        gen value_`x'S_preferred_v3 = value_`x'S_modelaverage
        gen value_`x'P_preferred_v3 = value_`x'P_modelaverage
        gen value_`x'_preferred_v3 = value_`x'_modelaverage

        * Convert to Millions
        gen value_`x'S_perfirm = 1e3 * value_`x'S_preferred_v3 / nfirms_s 
        gen value_`x'P_perfirm = 1e3 * value_`x'P_preferred_v3 / nfirms_p
        gen value_`x'_perfirm = 1e3 * value_`x'_preferred_v3 / (nfirms_p + nfirms_s)

        gen value_`x'S_perowner = 1e3 * value_`x'S_preferred_v3 / n`x'owners_s 
        gen value_`x'P_perowner = 1e3 * value_`x'P_preferred_v3 / n`x'owners_p
        gen value_`x'_perowner = 1e3 * value_`x'_preferred_v3 / (n`x'owners_p + n`x'owners_s)

        gen return_on_equity_`x'S = 100 * 1e-9 * .25 * profits_syzz_s/value_`x'S_preferred_v3 
        gen return_on_equity_`x'P = 100 * 1e-9 * .25 * profits_syzz_p/value_`x'P_preferred_v3 
        gen return_on_equity_`x' = 100 * 1e-9 * .25 * (profits_syzz_p + profits_syzz_s)/value_`x'_preferred_v3 
        foreach v of varlist return_on_equity_`x'* {
            impose_bounds `v', upper(100) lower(-100)
        }

        * Compute aggregate return on equity
        bys year: egen total_value_`x' = total(value_`x'_preferred_v3)
        bys year: egen total_profits_`x'S = total(profits_s)
        bys year: egen total_profits_`x'P = total(profits_p)
        gen total_profits_`x' = total_profits_`x'S + total_profits_`x'P
        gen return_on_equity_agg_`x' = 100 * 1e-9 * .25 * total_profits_`x'/total_value_`x'

        drop *_ub
        outsheet using panel_`x'pvtvalues.csv, comma replace
    }

end/*%>*/

*******************************************************************************
* Build micro data sets
*******************************************************************************
capture program drop query_factors_Z/*%<*/
program define query_factors_Z

    * Query from Z server
    * Assessor file based on assessments made in 2012 or 2013
    local q = "SELECT sum(SA_TAX_VAL)/1000000 taxval," 
    local q = "`q'" + "sum(SA_VAL_ASSD)/1000000 totalval,"
    local q = "`q'" + "sum(SA_VAL_MARKET)/1000000 totalval2,"
    local q = "`q'" + "count(*) count, SA_SITE_STATE state "
    local q = "`q'" + "FROM assess GROUP BY SA_SITE_STATE"

    set odbcmgr unixodbc
    odbc load, exec("`q'") dsn("dataquick") clear
    outsheet using $inputs/dq_state_collapse_20190202.csv, replace

end/*%>*/

capture program drop build_housing_factors/*%<*/
program define build_housing_factors

    tempfile dq prop attom

    load_analysis_data attom
    save `attom'

    * 1. Build factors for cross section.
    *query_factors_Z
    load_analysis_data dq_factors

    gen factor_1 = totalval/taxval
    gen factor_2 = totalval2/taxval
    gen factor = cond(factor_2 > factor_1 & factor_2 != ., factor_2, factor_1)
    gen agg_valH = cond(factor_2 > factor_1 & factor_2 != ., totalval2, totalval)

    * Version 2.0: fix other property tax rates using ATTOM collapse from most recent
    * year...possible candidates include IL, PA, DE, VT. In Downloads right now.
    merge 1:1 state using `attom', keep(3) nogen
    gen etr_cl = 1/factor
    gen etr_diff = abs(etr_cl - etr_attom)
    gen factor_cl = factor
    gen factor_attom = 1/etr_attom
    replace factor = factor_attom if etr_diff > .001
    replace agg_valH = (factor_attom / factor_cl) * agg_valH if etr_diff > .001

    egen agg_val = total(agg_valH)
    egen agg_tax = total(taxval)
    gen factor_agg = agg_val/agg_tax
    keep state factor* taxval 
    rename taxval proptax_cl
    save `dq'

    load_analysis_data proptax
    rename stateabbrev state
    rename proptaxrev_sl proptax_census
    keep statename state statefips year proptax_census proptaxrev pop
    * Recode to millions to match Corelogic
    replace proptax_census = 1e-3 * proptax_census
    merge m:1 state using `dq', keep(3) nogen
    save `prop'

    * 2. Use house price indexes to create time series of factors.
    * Load state price indexes from CoreLogic.
    tempfile factors_ts
    load_analysis_data corelogic
    drop if year == 1976
    * Move to 2012 dollars
    gen prcH = (year == 2012) * price_index_cl
    bys state: egen price_index_2012 = max(prcH)
    replace price_index_cl = price_index_cl / price_index_2012
    drop prcH price_index_2012
    merge 1:1 state year using `prop', keep(1 3) nogen

    sort state year
    foreach v of varlist pop proptax_census {
        by state: assert `v'[_n-1] != . if year == 2016
    }

    * Estimate real estate wealth in other years
    gen realH = (year == 2012) * (factor * proptax_cl)
    bys state: egen real_2012 = max(realH)
    gen realestate_val = price_index_cl * real_2012
    gen popH = (year == 2012) * pop
    bys state: egen pop_2012 = max(popH)
    gen pop_scale = pop/pop_2012
    drop realH real_2012 popH
    replace realestate_val = realestate_val * pop_scale

    * Cap factors in other years

    * Generate a scale variable since Census real estate tax revenues are on too
    * broad a base (includes commercial real estate)
    gen proptax_clH = (year == 2012) * proptax_cl
    egen agg_proptax_cl = total(proptax_clH)
    bys year: egen agg_proptax_census = total(proptax_census)
    gen proptax_scaleH = (year == 2012) * agg_proptax_cl / agg_proptax_census
    egen proptax_scale = max(proptax_scaleH)
    gen proptax_census_scale = proptax_census * proptax_scale

    * Generate factors
    gen factor_housing = realestate_val / (proptax_census_scale)
    sort state year
    replace factor_housing = (factor_housing[_n-1] + factor_housing[_n+1])/2 ///
        if year == 2001 | year == 2003

    keep year-proptax_cl factor_agg proptax_census_scale factor realestate_val pop_scale factor_housing 

    bys year: egen agg_reval = total(realestate_val) 
    bys year: egen agg_proptax = total(proptax_census_scale) 
    gen agg_factor_housing = agg_reval/agg_proptax
    save `factors_ts'

    outsheet using "housing_factors_20200127.csv", replace comma

end/*%>*/

*******************************************************************************
* Complex data manipulations
*******************************************************************************
capture program drop load_taxdata/*%<*/
program define load_taxdata
	syntax, rankspec(int) [startyr(string) endyr(string) year(string) units(string) top0001(string)]

	/***************************************************************************
		(0) Parse program inputs
	***************************************************************************/

	if "`year'" != "" & real("`year'") != . {
		local startyr = `year'
		local endyr = `year'
	}

	if "`startyr'" == "" {
		di "No startyr specified; assuming start year = 1966"
		local startyr = 1966
	}
	if "`endyr'" == "" {
		di "No endyr specified; assuming end year = 2016"
		local endyr = 2016
	}

	if "`units'" != "" & !inlist("`units'", "tu", "es") {
		di as error "Invalid split level; must be es or tu"
		exit
	}
	else if "`units'" == "" {
		di "No units specified; assuming individuals (equal split)"
		local units = "es"
	}

	/***************************************************************************
		(1) Load in data from start year to end year, constructing baseline 
			variables.
	***************************************************************************/

	forv year = `startyr' / `endyr' {

		/***********************************************************************
			(1.1) Read in files
		***********************************************************************/

      
        import delim using $inputs/20220127/wealth_scen`rankspec'_`units'_`year'.csv, clear
      

	
		/***********************************************************************
			(1.2) Construct preferred wealth portfolio allocations
		***********************************************************************/

        /* From wealthbuild-20220114.do
        spec 3016:
                taxbond_muf taxbond_info muni currency  
                ownerhome_szz rentalhome ownermort rentalmort nonmort_ini
                ccorw_9010  
                penw_szz_scaled_v8
                scorw_szzhybnof_scaled partw_szzhybnof_sz20_scaled solepropw_szz_sz20_scaled
                miscw
        spec 3115:
                taxbond_muf taxbond_cmd_3tier muni currency  
                ownerhome_szz rentalhome ownermort rentalmort nonmort_ini
                ccorw_9010  
                penw_szz_scaled_v8
                scorw_szz_scaled partw_szz_sz20_scaled solepropw_szz_sz20_scaled
                miscw
        spec 3215:
                taxbond_muf taxbond_cmd_3tier muni currency  
                ownerhome_ini rentalhome ownermort_ini rentalmort nonmort_ini
                ccorw_9010  
                szz_penw_pre1980_v8
                scorw_szz_scaled partw_szz_sz20_scaled solepropw_szz_sz20_scaled
                miscw
        spec 3017: (our preferred spec without DB)
                taxbond_muf taxbond_info muni currency  
                ownerhome_szz rentalhome ownermort rentalmort nonmort
                ccorw_9010  
                penw_szz_scaled_v8
                s_value_avg_ebitda_hybnof_red p_value_avg_ebitda_hybnof_red solepropw
                missing_scorp missing_pship 
                miscw
        spec 3007: 
                taxbond taxbond_mufmisc_SZ muni currency
                ownerhome rentalhome ownermort rentalmort nonmort_ini
                ccorw  
                hwpen_ini
                scorw partw_sz20_scaled solepropw_sz20_scaled
        spec 3018: 
                taxbond_equal muni currency
                ownerhome rentalhome ownermort rentalmort nonmort_ini
                ccorw_equal  
                hwpen_ini
                scorw partw_sz20_scaled solepropw_sz20_scaled
                miscw
        */

        if inlist(`rankspec', 20) {
            if `year' > 1979 {
                gen penw_szz_scaled_v8 = .
            }
            else {
                gen szz_penw_pre1980_v8 = .
            }
        }

		if `year' > 2000 {
            gen pthru_pref = solepropw + s_value_avg_ebitda_hybnof_red + ///
                                 p_value_avg_ebitda_hybnof_red + missing_pship + missing_scorp
            gen pthru_equ = scorw + partw_sz20_scaled + solepropw_sz20_scaled
            gen pthru_base = solepropw_szz_sz20_scaled + scorw_szzhybnof_scaled + partw_szzhybnof_sz20_scaled
			gen hwfix_pref = taxbond_muf + taxbond_info + muni + currency
            gen hwfix_equ = taxbond + muni + currency + taxbond_mufmisc_sz
            gen hwfix_base = hwfix_pref
            gen hwfix_equal = taxbond_equal + muni + currency 
		}
		else {
			gen pthru_pref = scorw + partw + solepropw + missing_pship + missing_scorp
            gen pthru_equ = scorw + partw_sz20_scaled + solepropw_sz20_scaled
            gen pthru_base = solepropw_szz_sz20_scaled + scorw_szz_scaled + partw_szz_sz20_scaled  
			gen hwfix_pref = taxbond_cmd_3tier + muni + currency + taxbond_muf
            gen hwfix_equ = taxbond + muni + currency + taxbond_mufmisc_sz
            gen hwfix_base = hwfix_pref
            gen hwfix_equal = taxbond_equal + muni + currency 
		}

		if `year' > 1979 {
			gen hwhou_pref = ownerhome_szz + rentalhome + ownermort + rentalmort
			gen hwhou_equ = ownerhome + rentalhome + ownermort + rentalmort
			gen hwhou_base = hwhou_pref
			capture gen hwpen_pref = penw_szz_scaled_db_v8
			capture gen hwpen_pref = penw_szz_scaled 
            gen hwpen_equ = hwpen_ini
			gen hwpen_base = penw_szz_scaled_v8
		}
		else {
			gen hwhou_pref = ownerhome_ini + rentalhome + ownermort_ini + rentalmort
			gen hwhou_equ = ownerhome_ini + rentalhome + ownermort_ini + rentalmort
			gen hwhou_base = hwhou_pref
			gen hwpen_pref = szz_penw_pre1980
            gen hwpen_equ = hwpen_ini
            gen hwpen_base = szz_penw_pre1980_v8
		}

		gen hwbus_pref = (0.2 * ccorw_9010) + pthru_pref
		gen hwbus_base = (0.2 * ccorw_9010) + pthru_base
        gen hwbus_equ = (0.2 * ccorw) + pthru_equ
		gen hwequ_pref = 0.8 * ccorw_9010 
        gen hwequ_base = hwequ_pref
        gen hwequ_equ = 0.8 * ccorw

        gen ccorw_pref = ccorw_9010 
        gen ccorw_base = ccorw_pref
        gen ccorw_equ = ccorw

        * Don't have miscw for some of the new specs because they're folded into
        * taxbond per SZ16/PSZ18
        if inlist(`rankspec', 3007, 3107, 3207, 3018, 3118, 3218) {
            gen miscw_hweal`rankspec' = 0
        }

		if inlist(`rankspec', 188, 198, 212) {
			local niwspec = `rankspec' - 100
			gen miscw = miscw_hweal`niwspec'
            gen hwoth_pref = nonmort + miscw
            gen hwoth_base = nonmort_ini + miscw
		}
		else if inlist(`rankspec', 3025, 3125, 3225){
			gen miscw = miscw_hweal`rankspec'
            gen hwoth_pref = nonmort_nostu + miscw
            gen hwoth_base = nonmort_ini + miscw
            gen hwoth_equ = nonmort_ini
		}
        else if inlist(`rankspec', 3516, 3615, 3715) {
			local niwspec = `rankspec' - 500
			gen miscw = miscw_hweal`niwspec'
            gen hwoth_pref = nonmort + miscw
            gen hwoth_base = nonmort_ini + miscw
        }
		else if inlist(`rankspec', 88, 98, 112) | `rankspec' > 3000 { // Only have miscw for updated parameters
			gen miscw = miscw_hweal`rankspec'
            gen hwoth_pref = nonmort + miscw
            gen hwoth_base = nonmort_ini + miscw
            gen hwoth_equ = nonmort_ini
		}
		else if inlist(`rankspec', 118) {
			gen miscw = miscw_hweal`rankspec'
            gen hwoth_pref = nonmort + miscw
            gen hwoth_base = nonmort_ini + miscw
		}

        * Forbes categories
        if (`year' > 1981 & !inlist(`rankspec', 20, 198, 212) & `rankspec' < 3000) {
            replace hwfix_pref = hwfix_pref + forbes400_hwfix
            replace hwequ_pref = hwequ_pref + forbes400_hwequ
            replace hwbus_pref = hwbus_pref + forbes400_hwbus
            replace hwhou_pref = hwhou_pref + forbes400_hwhou
            replace hwoth_pref = hwoth_pref + forbes400_hwoth

            replace pthru_pref = pthru_pref + forbes400_pthru 
            replace ccorw_pref = ccorw_pref + forbes400_ccorw
        } 

		/***********************************************************************
			(1.3) Construct baseline income variables
		***********************************************************************/

        if ("`top0001'" == "") {

            gen inc_fix = fiint + intest
            gen inc_scorp = scorpinc2
            gen inc_div = fidiv + divest
            gen inc_kg = fikgi + kgest
            gen inc_bus = (schcincp+rylinc+partpinc2)
            gen inc_fiscal = fninc 
            gen inc_pen = peninc
            gen inc_ini = ptinc

        }

		/***********************************************************************
			(1.4) Generate year variable
		***********************************************************************/

		gen year = `year'

		/***********************************************************************
			(1.5) Create mutually exclusive groups
		***********************************************************************/

        if ("`top0001'" == "") {
            count
            if (`r(N)' == 6) {
                intermediate_levels6 `rankspec'
            }
            else if (`r(N)' == 5) {
                intermediate_levels5 `rankspec'
            }
        }

		tempfile tax`year'
		save `tax`year''

	}

	clear

	/***************************************************************************
		(2) Append data files in memory
	***************************************************************************/

	forv year = `startyr'/`endyr' {
		append using `tax`year''
	}

	order group year
	ds group year, not
	order `r(varlist)', alphabetic last

	compress

	di "Data in memory"

end/*%>*/

capture program drop load_preferred_taxdata/*%<*/
program define load_preferred_taxdata
    syntax, earlyspec(int) midspec(int) latespec(int) [units(string) top0001(string)]

    /***************************************************************************
        (0) Parse program inputs
    ***************************************************************************/

    if "`units'" != "" & !inlist("`units'", "tu", "es") {
        di as error "Invalid split level; must be ES or TU"
        exit
    }
    else if "`units'" == "" {
        di "No units specified; assuming individuals (equal split)"
        local units = "es"
    }

    /***************************************************************************
        (1) Load in data
    ***************************************************************************/

    tempfile early mid

    load_taxdata, rankspec(`earlyspec') units("`units'") startyr(1966) endyr(1979) top0001("`top0001'")
    qui save `early'

    load_taxdata, rankspec(`midspec') units("`units'") startyr(1980) endyr(2000) top0001("`top0001'")
    qui save `mid'

    load_taxdata, rankspec(`latespec') units("`units'") startyr(2001) endyr(2016) top0001("`top0001'")

    /***************************************************************************
        (2) Append different "sections"
    ***************************************************************************/
    
    qui append using `mid'
    qui append using `early'

    /***************************************************************************
        (3) Create preferred wealth variable
    ***************************************************************************/

    #delimit ;
    qui gen hweal_preferred = cond(year > 2000, hweal`latespec',
                              cond(year > 1979, hweal`midspec',
                                                hweal`earlyspec'));

    if ("`top0001'" == "") {;
        qui gen w_group = cond(year < 1980, w`earlyspec'_group,
                          cond(year < 2001, w`midspec'_group, w`latespec'_group));
        assert !missing(w_group);
    };

    
    #delimit cr

    di "Data in memory"

    assert !missing(hweal_preferred)
end/*%>*/

capture program drop es_rank_scf/*%<*/
program define es_rank_scf
	syntax, rankvar(string) [othersplitvars(string) outname(string)]

	/***************************************************************************
		(1.0) Parse program inputs
	***************************************************************************/

	foreach varname in `rankvar' `othersplitvars' {
		capture confirm numeric variable `varname', exact
		if _rc > 0 {
			di as error "Variable `varname' not found; do not abbreviate variable names"
			exit 111
		}
	}

	foreach addtlvar in married year {
		capture confirm variable `addtlvar'
		if _rc > 0 {
			di as error "Need `addtlvar' variable"
			exit 111
		}
	}

	if regexm("`othersplitvars'", "`rankvar'") {
		di as error "Warning: ranking variable substring contained within othersplitvars"
		di as error "Ranking variable may be split twice"
	}

	/***************************************************************************
		(1.1) Make SCF rows into ``individuals:'' duplicate rows representing 
			married couples
	***************************************************************************/

	expand 2 if married == 1

	foreach splitvar of varlist `rankvar' `othersplitvars' {
		replace `splitvar' = `splitvar' / 2 if married == 1
	}

	cumul `rankvar' [aw = wgt], by(year) gen(rank)

	tempfile scf_indvls
	save `scf_indvls'

	/***************************************************************************
		(1.2) Load in counts of equal split individuals from SZ 2020 parameters 
			file
	***************************************************************************/

	import excel using $inputs/parameters.xlsx, firstrow clear

	keep if inrange(yr, 1989, 2019) & mod(yr - 1989, 3) == 0

	keep yr totadults20 
	rename yr year

	assert totadults20 < 1E6
	replace totadults20 = totadults20 * 1E3 

	sort year

	tempfile parameters
	save `parameters'

	/***************************************************************************
		(1.3) Merge parameters counts onto SCF data and adjust ranking variable
	***************************************************************************/

	use `scf_indvls', clear

	merge m:1 year using `parameters', assert(2 3) keep(3) nogen

    sort year
    egen num_ind = total(wgt), by(year) // Get total individuals according to SCF 

    /* For each observation, compute the percent of individuals in  
        the US (as represented by SCF) with wealth rank above that 
        observation */
    gen pct_ind_above = 1 - rank

    /* Now use this to compute number of households with wealth rank  
        above that observation */
    gen num_ind_above = pct_ind_above * num_ind

    /* Divide the number of HHs in US with greater rank by total  
        number of tax units in that year minus 400 TUs. */
    gen pct_ind_above_taxdata = num_ind_above / (totadults20 - 400)

    /* The rank of the observation among all tax units (excluding the 
        Forbes 400) is one minus the percent of non-Forbes 400 tax units 
        holding greater wealth than the observation */
    if "`outname'" == "" {
    	di "No output name specified; naming adjusted rank es_rank"
    	gen es_rank = 1 - pct_ind_above_taxdata
    }
    else {
    	gen `outname' = 1 - pct_ind_above_taxdata	
    }
    drop num_ind pct_ind_above num_ind_above pct_ind_above_taxdata totadults20 rank

end/*%>*/

capture program drop tu_rank_scf/*%<*/
program define tu_rank_scf

	syntax, rankvar(string) [outname(string)]

	/***************************************************************************
		(1.0) Parse program inputs
	***************************************************************************/

	capture confirm numeric variable `rankvar', exact
	if _rc > 0 {
		di as error "Variable `rankvar' not found; do not abbreviate variable names"
		exit 11
	}

	capture confirm variable year
	if _rc > 0 {
		di as error "Need year variable"
		exit 11
	}

	if "`outname'" == "" {
		di "No output name specified; naming adjusted rank tu_rank"
		local outname = "tu_rank"
	}

	/***************************************************************************
		(1.1) Calculate baseline rank among households
	***************************************************************************/

	qui cumul `rankvar' [aw = wgt], by(year) gen(rank)

	tempfile orig
	qui save `orig'

	/***************************************************************************
		(1.2) Load in counts of tax units from SZ 2020 parameters file; note 
			that main tax units count in SZ 2020 revisionists includes
			institutionalized population, which is a shift from PSZ 18 in which
			TUs are about 10M fewer per year. PSZ 2020 spreadsheets still have 
			original series in DataIncome.
	***************************************************************************/
	
	qui import excel "$inputs/PSZ2020AppendixTablesI(Aggreg).xlsx", ///
		sheet(DataIncome) cellrange(A12:ADD118) clear

	keep A ADD
	rename (A ADD) (yr tottaxunits)

	/* Use year that represents SCF sampling frame, which is filing year prior to 
		sampling year; we follow SZ 2016 here (see scf(SZ).do line 359). */	
	gen new_yr = yr + 1
	drop yr
	rename new_yr yr

	qui keep if inrange(yr, 1989, 2019) & mod(yr - 1989, 3) == 0

	keep yr tottaxunits 
	rename yr year

	assert tottaxunits < 1E6
	qui replace tottaxunits = tottaxunits * 1E3 

	sort year

	tempfile parameters
	qui save `parameters'

	/***************************************************************************
		(1.3) Merge parameters counts onto SCF data and adjust ranking variable
	***************************************************************************/
	
	use `orig', clear

 	qui merge m:1 year using `parameters', assert(2 3) keep(3) nogen
	
    sort year
    qui egen num_hh = total(wgt), by(year) // Get total HHs according to SCF 

    /* Try using SZ's method for getting total weights instead; ensure that totals match 
    	across methods */
    qui gen N = 1
    qui gen num_hh_alt = 0

    qui levelsof year, local(years) clean
    foreach year of numlist `years' {
    	qui summ N [w = wgt] if year == `year'
    	qui replace num_hh_alt = r(sum) if year == `year'
    }
    assert num_hh == num_hh_alt

    /* For each observation, compute the percent of HHs in the US (as represented
    	by SCF) with wealth rank above observation */
    qui gen pct_hh_above = 1 - rank

    * Now use this to compute number of HHs with wealth rank above observation
    qui gen num_hh_above = pct_hh_above * num_hh

    /* Divide the number of HHs in US with greater rank by total  
        number of TUs in that year minus 400 TUs (SCF doesn't cover Forbes). */
    qui gen pct_tu_above_taxdata = num_hh_above / (tottaxunits /* - 400 */)

    /* The rank of the observation among all TUs (excluding the Forbes 400) is 
    	one minus the percent of non-Forbes 400 HHs holding greater wealth than 
    	the observation */
	qui gen `outname' = 1 - pct_tu_above_taxdata	
    
    * Check that this matches the much shorter formula in SZ 2016 code
    gen check = 1 - (1 - rank) * num_hh / tottaxunits
    assert abs(`outname' - check) < 1E-5

    drop num_hh pct_hh_above num_hh_above pct_tu_above_taxdata tottaxunits rank ///
    	check N num_hh_alt

end/*%>*/

capture program drop intermediate_levels5/*%<*/
program define intermediate_levels5
	args ranking
	
	drop if regexm(group, "aires")
	count
	assert `r(N)' == 5

	assert group[1] == "All"
	assert group[2] == "P90-100"
	assert group[3] == "P99-100"
	assert group[4] == "P99.9-100"
	assert group[5] == "P99.99-100" 

	set obs `=_N+5'
	capture confirm variable w`ranking'_group
	if _rc != 0 {
		gen w`ranking'_group = _n
	}
	else {
		replace w`ranking'_group = _n if missing(w`ranking'_group)
	}

	replace group = "P0-90" in `=_N-4'	
	replace group = "P90-99" in `=_N-3'
	replace group = "P99-99.9" in `=_N-2'
	replace group = "P99.9-99.99" in `=_N-1'
	replace group = "P99.99-100" in `=_N'
	
	sort w`ranking'_group
	capture confirm variable threshold 
	if _rc == 0 {
		ds *group year threshold, not
	}
	else {
		ds *group year, not
	}
	foreach var of varlist `r(varlist)' {
		replace `var'  = `var'[1] - `var'[2] in `=_N-4'
		replace `var'  = `var'[2] - `var'[3] in `=_N-3'
		replace `var'  = `var'[3] - `var'[4] in `=_N-2'
		replace `var'  = `var'[4] - `var'[5] in `=_N-1'
		replace `var'  = `var'[5] in `=_N'
	}
	sort w`ranking'_group
	replace year = year[1] if mi(year)

end/*%>*/

capture program drop intermediate_levels6/*%<*/
program define intermediate_levels6
	args ranking
	
	drop if regexm(group, "aires")
	count
	assert `r(N)' == 6

	assert group[1] == "All"
	assert group[2] == "P90-100"
	assert group[3] == "P99-100"
	assert group[4] == "P99.9-100"
	assert group[5] == "P99.99-100" 
	assert group[6] == "P99.999-100" 

	set obs `=_N+5'
	capture confirm variable w`ranking'_group
	if _rc != 0 {
		gen w`ranking'_group = _n
	}
	else {
        replace w`ranking'_group = 6 if w`ranking'_group == 9
		replace w`ranking'_group = _n if missing(w`ranking'_group)
	}

	replace group = "P0-90" in `=_N-4'	
	replace group = "P90-99" in `=_N-3'
	replace group = "P99-99.9" in `=_N-2'
	replace group = "P99.9-99.99" in `=_N-1'
	replace group = "P99.99-99.999" in `=_N'
	
	sort w`ranking'_group
	capture confirm variable threshold 
	if _rc == 0 {
		ds *group year threshold, not
	}
	else {
		ds *group year, not
	}
	foreach var of varlist `r(varlist)' {
		replace `var'  = `var'[1] - `var'[2] in `=_N-4'
		replace `var'  = `var'[2] - `var'[3] in `=_N-3'
		replace `var'  = `var'[3] - `var'[4] in `=_N-2'
		replace `var'  = `var'[4] - `var'[5] in `=_N-1'
		replace `var'  = `var'[5] - `var'[6] in `=_N'
	}
	sort w`ranking'_group
	replace year = year[1] if mi(year)

end/*%>*/

capture program drop make_summary_wealth_table/*%<*/
program define make_summary_wealth_table
	
	/***************************************************************************
		(0) Define inputs
	***************************************************************************/

	syntax, ranking(int) sz_wlth_defn(int) preferred_wlth_defn(int) namesuffix(string) year(int)

	/***************************************************************************
		(1) Check that inputs are valid
	***************************************************************************/

	if !inrange(`sz_wlth_defn', 1, 33) | !inrange(`preferred_wlth_defn', 1, 112) ///
		| !inrange(`ranking', 1, 112) {

		di as error "One of wealth definitions/ranking out of range; choose 1-33"

		exit
	} 

	if !inlist(`year', 2014, 2016) {
		di as error "Year selection out of range; choose between 2014 and 2016"

		exit
	}

	if strlen("`namesuffix'") > 25 {
		di as error "Name suffix too long; must be fewer than 25 characters"

		exit
	}

	/***************************************************************************
		(2) Drop unnecessary wealth groups (millionaires, 50M millionaires, etc)
			and unnecessary year observations. Ensure 10 wealth groups after 
			dropping these wealth groups. 
	***************************************************************************/

	/* Important to drop these rows so as not to mess with future code that 
		depends on number of lines of code */
	qui drop if regexm(group, "aires")
	qui keep if year == `year'

	qui count 
	assert `r(N)' == 10

	/***************************************************************************
		(3) Drop unnecessary variables
	***************************************************************************/

	keep group w`ranking'_group n threshold hweal`sz_wlth_defn' hweal`preferred_wlth_defn'

	/***************************************************************************
		(4) Compute wealth shares and average wealth in groups
	***************************************************************************/

	foreach var in hweal`sz_wlth_defn' hweal`preferred_wlth_defn' {
		qui gen sh_`var'  = 100 * (`var' / `var'[1])
	}

	/* Mean wealth (note: after this step, hweal`spec' is NOW A MEAN and NOT THE
		 TOTAL) */
	foreach var in hweal`sz_wlth_defn' hweal`preferred_wlth_defn' {
		qui replace `var' = (`var' / n)
	}

	/***************************************************************************
		(5) Reformat variables
	***************************************************************************/

	qui replace threshold = round(threshold, 10^3)
	qui replace threshold = . if group == "All"

	sort w`ranking'_group

	forv i = 2 / 5 {
		local j = `i' + 5
		qui replace threshold = threshold[`i'] in `j'
	}

	* label income groups
	qui gen lab = cond(group == "All", "Full population", ///
				cond(group == "P90-100", "Top 10\%", ///
				cond(group == "P99-100", "Top 1\%", ///
				cond(group == "P99.9-100", "Top 0.1\%", ///
				cond(group == "P99.99-100", "Top 0.01\%", ///
				cond(group == "P0-90", "Bottom 90\%", ///
				cond(group == "P90-99", "Top 10-1\%", ///
				cond(group == "P99-99.9", "Top 1-0.1\%", "Top 0.1-0.01\%"))))))))
	
	foreach wlthspec in `sz_wlth_defn' `preferred_wlth_defn' {
		qui recast double hweal`wlthspec'
		qui replace hweal`wlthspec' = round(hweal`wlthspec', 1000)
	}

	rename w`ranking'_group w_group

	* formatting
	qui recast double n
	qui replace n = round(n, 100)

	qui format threshold* hweal`sz_wlth_defn' hweal`preferred_wlth_defn' n* %12.0fc
	qui format sh* %12.1f

	/* Converting to strings, setting blanks properly, and adding dollar signs 
		in LaTeX-readable format */
	qui tostring threshold* hweal* sh*, replace usedisplayformat force
	qui replace threshold = "" if threshold == "."

	foreach dollarvar in threshold hweal`sz_wlth_defn' hweal`preferred_wlth_defn' {
		qui replace `dollarvar' = " \\$" + `dollarvar' if `dollarvar' != ""
	}

	qui replace sh_hweal`sz_wlth_defn' = sh_hweal`sz_wlth_defn' + "\%"
	qui replace sh_hweal`preferred_wlth_defn' = sh_hweal`preferred_wlth_defn' + "\%"

	/***************************************************************************
		(6) Prepare to make LaTeX output via listtex
	***************************************************************************/

	#delimit ;
	qui gen tab = "\begin{tabular}{l L{2.5cm} L{2.5cm} L{2.75cm} L{2.75cm} c 
				C{2.75cm} C{2.75cm} }" in 1;
	#delimit cr

	qui gen hline = "\hline" in 1
	qui gen top = "\toprule" in 1
	qui gen mid = "\midrule" in 1 
	qui gen cline = "\cline{4-5} \cline{4-5} \cline{7-8} " in 1 
	qui gen bot = "\bottomrule" in 1
	qui gen end = "\end{tabular}" in 1

	qui gen c = ""

	#delimit ;
	qui gen title1 = "Wealth group & Count & Threshold & \multicolumn{2}{c}{Average wealth} 
					& &  \multicolumn{2}{c}{Wealth share}" in 1;
	qui gen title2 = " & & & Equal Return & Preferred & & Equal Return & Preferred ";
	qui gen panelA = "\multicolumn{8}{c}{ \textit{Panel A. Top wealth groups}} " in 1;
	qui gen panelB = "\multicolumn{8}{c}{ \textit{Panel B. Intermediate wealth groups}} " in 1;
	#delimit cr

	/***************************************************************************
		(7) Output table to file
	***************************************************************************/

	local tabname = "SZtab1_`namesuffix'"

	*Output
	qui listtex tab if _n == 1 using "`tabname'.tex", replace rstyle(none)
	qui listtex top if _n == 1, appendto("`tabname'.tex") rstyle(none)
	qui listtex title1 if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
	qui listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(none)
	qui listtex title2 if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
	qui listtex mid if _n==1 , appendto("`tabname'.tex") rstyle(tabular)	

	* PANEL A
	qui listtex c if _n==1, appendto("`tabname'.tex") rstyle(tabular)	
	qui listtex panelA if _n==1 , appendto("`tabname'.tex") rstyle(tabular)
	qui listtex c if _n==1, appendto("`tabname'.tex") rstyle(tabular)	
	qui listtex lab n`rank' threshold`rank' hweal`sz_wlth_defn' ///
		hweal`preferred_wlth_defn' c sh_hweal`sz_wlth_defn' ///
		sh_hweal`preferred_wlth_defn' if w_group <= 5, ///
			appendto("`tabname'.tex") rstyle(tabular)

	* PANEL B
	qui listtex c if _n==1, appendto("`tabname'.tex") rstyle(tabular)	
	qui listtex c if _n==1, appendto("`tabname'.tex") rstyle(tabular)	
	qui listtex panelB if _n==1 , appendto("`tabname'.tex") rstyle(tabular)
	qui listtex c if _n==1, appendto("`tabname'.tex") rstyle(tabular)	
	qui listtex lab n`rank' threshold`rank' hweal`sz_wlth_defn' ///
		hweal`preferred_wlth_defn' c sh_hweal`sz_wlth_defn' ///
		sh_hweal`preferred_wlth_defn' if w_group > 5, ///
			appendto("`tabname'.tex") rstyle(tabular)

	qui listtex bot if _n == 1, appendto("`tabname'.tex") rstyle(none)	
	qui listtex end if _n == 1, appendto("`tabname'.tex") rstyle(none)

end/*%>*/

capture program drop adjust_inflation/*%<*/
program define adjust_inflation

syntax varlist , year(real) [month_3letter_or_annual(string) includestates(string)]
/*******************************************************************************
	OWNER: STEPHANIE KESTELMAN
	DATE: AUG 4, 2017


	THIS PROGRAM ADJUSTS FOR INFLATION AT THE STATE LEVEL, and TAKES IN THE 
		FOLLOWING INPUTS:
		
		* YEAR WE WANT $S IN 
		* MONTH THE DATA IS IN. IF ANNUAL, WRITE ANNUAL
		* WHICH STATES TO INCLUDE. PROGRAM CAN BE MODIFIED IF NOT AT STATE LEVEL. 
		YEAR RANGE: 1920-2017. TO EXPAND RANGE, DOWNLOAD DESIRED RANGE OF YEARS FROM 
		https://data.bls.gov/pdq/SurveyOutputServlet 

	Edit path below when using another file
*******************************************************************************/



	qui{
	preserve /*Preserves data being adjusted*/
		
	* ADJUST FILE PATH AS NEEDED
	import excel using $inputs/CPI_1920_2018_20190305.xlsx, ///
		cellrange(A12:P110) firstrow clear
	
	if "`month_3letter_or_annual'"==""{
		local month_3letter_or_annual = "annual"
	}
	
	local month = proper("`month_3letter_or_annual'")
	
	rename (Year `month') (year `month_3letter_or_annual')
	
	keep year `month_3letter_or_annual'
	
	*Get CPI for year we want to convert all other dollars into. E.g. if want to 
	*convert to 2014 dollars, select 2014. Calculate inflation rate + 1
	ds year, not
	foreach var in `r(varlist)'{
		g `var'base = `var' if year==`year'
		egen `var'_`year' = max(`var'base)
		g  `var'_inflation =`var'_`year'/`var'
	}
	drop *base *_`year'
	
	tempfile inflation
	save `inflation'
	restore /*Restores data being adjusted*/
	
	merge m:1 year using `inflation', keep(3) nogen

	*ADJUST FOR INFLATION IF IN THE LIST OF STATES TO INCLUDE. Some states conduct 
	*their data gathering in different months, so the inflation rate is slightly different. 
	*If all the same, can comment out the portion "if regexm("`includestates'", stateabbr)"
	if "`includestates'"!=""{
		foreach var in `varlist'{
			replace `var' = `month_3letter_or_annual'_inflation* `var' if regexm("`includestates'", stateabbr)
		}
	}
	
	else if "`includestates'"==""{
		foreach var in `varlist'{
			capture replace `var' = `month_3letter_or_annual'_inflation* `var'
			if _rc==0{
			di as error "Adjusted `var' to `year' dollars"
			}
		}
	}
	drop `month_3letter_or_annual' `month_3letter_or_annual'_inflation
	
	
	}
end/*%>*/

capture program drop setup_compustat_variables/*%<*/
program define setup_compustat_variables

    ****************************************************************************
    * Compustat valuation data.%<
    ****************************************************************************
    make_cstat_20190610
    use "$inputs/compustat_profits_20190610.dta", clear

    keep fyear conm gvkey standard_naics Q_bsw mktval_equity sale at ceq pi ppent profits ebit ebitd

    gen naics_4d = floor(standard_naics/100)
    fix_naics

    #delimit ;

    gen size_bucketH = cond(sale < 10, 5,
                        cond(sale < 100, 6, 7));
    #delimit cr
    gen late = fyear > 2000 
    bys gvkey late: egen size_bucket = max(size_bucketH)

    gen mktval = Q_bsw * at
    keep if mktval > 0

    gen multiple_assets = Q_bsw
    gen multiple_capital = Q_bsw * at / ppent
    gen multiple_sale = Q_bsw * at / sale
    gen multiple_ebitd = Q_bsw * at / ebitd
    gen multiple_bookeq = Q_bsw * at / ceq
    gen multiple_pretax = Q_bsw * at / pi
    gen multiple_profits = Q_bsw * at / profits

    gen eqmultiple_assets = mktval_equity / at
    gen eqmultiple_capital = mktval_equity / ppent
    gen eqmultiple_sale = mktval_equity / sale
    gen eqmultiple_ebitd = mktval_equity / ebitd
    gen eqmultiple_bookeq = mktval_equity / ceq
    gen eqmultiple_pretax = mktval_equity / pi
    gen eqmultiple_profits = mktval_equity / profits

    foreach v in "assets" "capital" "sale" "ebitd" "bookeq" "pretax" "profits" {
        trim_tails multiple_`v', level(.01)
        trim_tails eqmultiple_`v', level(.01)
    }
    drop *multiple*_ut
    drop *multiple*_w
    tempfile cstat
    save `cstat'
/*%>*/
    ****************************************************************************
    use `cstat', clear
    impose_bounds eqmultiple_assets, lower(0) upper(5)
    impose_bounds eqmultiple_sale, lower(0) upper(5)
    impose_bounds eqmultiple_capital, lower(0) upper(20)
    impose_bounds eqmultiple_ebitd, lower(0) upper(40)
    impose_bounds eqmultiple_profits, lower(0) upper(50)
    impose_bounds eqmultiple_bookeq, lower(0) upper(20)
    impose_bounds eqmultiple_pretax, lower(0) upper(50)

end/*%>*/
